Vulnerability History
| Date | High Risk | Low Risk | 
|---|---|---|
| 2024-12-20 | 6 | 0 | 
Audit Report Details
14825
      Lines of Code
    9
      Open
    0
      Resolved
    🚨 High Risk Vulnerabilities
⚠️ Low Risk Vulnerabilities
Vulnerable Code:
1---
2File: /coding/api/__init__.py
3---
4
5from openai import *
6
7
8---
9File: /coding/api/cleaners.py
10---
11
12from detect_secrets.core import scan
13from detect_secrets.settings import default_settings
14
15FIM_PREFIXES = ["<fim_prefix>", "[PREFIX]", "<PRE>", "<|fim_begin|>"]
16FIM_ENDS = ["<fim_middle>", "[SUFFIX]", "<SUF>", "<|fim_end|>"]
17FIM_HOLES = ["<fim_suffix>"]
18
19def clean_fixes(text):
20    for prefix in FIM_PREFIXES:
21        text = text.replace(prefix, "")
22    for end in FIM_ENDS:
23        text = text.replace(end, "")
24    for hole in FIM_HOLES:
25        text = text.replace(hole, "<|fim_hole|>")
26    return text
27
28def remove_secret_lines(multiline_string):
29    # Split the input string into individual lines
30    lines = multiline_string.split('\n')
31    
32    # Initialize a list to hold lines without secrets
33    clean_lines = []
34
35    # Scan each line for secrets
36    with default_settings() as settings:
37        settings.disable_plugins(
38            'Base64HighEntropyString',
39            'HexHighEntropyString'
40        )
41        for line in lines:
42            is_secret = False
43            for secret in scan.scan_line(line):
44                is_secret = True
45                break  # Exit the inner loop if a secret is found
46            
47            # If no secret is found, add the line to clean_lines
48            if not is_secret:
49                clean_lines.append(line)
50    
51    # Join the clean lines back into a single string
52    return '\n'.join(clean_lines)
53
54def remove_generate_prompt(string):
55    """
56    Cleaner to remove the blocks that are used by continue.dev when running `Generate Code`
57    """
58    blocks = ["<|im_start|>user\n", "<|im_end|>\n", "<|im_start|>assistant\n", "Sure! Here\'s the entire rewritten code block:\n```python\n"]
59    for block in blocks:
60        string = string.replace(block, "")
61    
62    return string
63
64
65---
66File: /coding/api/code.py
67---
68
69# The MIT License (MIT)
70# Copyright © 2021 Yuma Rao
71# Copyright © 2023 Opentensor Foundation
72# Copyright © 2023 Opentensor Technologies Inc
73
74# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
75# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
76# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
77# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
78
79# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
80# the Software.
81
82# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
83# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
84# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
85# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
86# DEALINGS IN THE SOFTWARE.
87
88import bittensor as bt
89from typing import List, Optional, Union, Any, Dict
90from bittensor.subnets import SubnetsAPI
91from coding.protocol import StreamCodeSynapse
92
93class CodeAPI(SubnetsAPI):
94    def __init__(self, wallet: "bt.wallet"):
95        super().__init__(wallet)
96        self.netuid = 45
97        self.name = "code"
98
99    def prepare_synapse(self, query: str, documents: List[Any]) -> StreamCodeSynapse:
100        return StreamCodeSynapse(query=query, documents=documents)
101
102    def process_responses(
103        self, responses: List[Union["bt.StreamCodeSynapse", Any]]
104    ) -> List[int]:
105        outputs = []
106        for response in responses:
107            if response.dendrite.status_code != 200:
108                continue
109            return outputs.append(response.completion)
110        return outputs
111
112
113
114---
115File: /coding/api/completion.py
116---
117
118import time
119import json
120
121from http import HTTPStatus
122from typing import AsyncGenerator, AsyncIterator, Union
123
124from coding.api.protocol import (
125    ChatCompletionRequest,
126    ChatCompletionResponseStreamChoice,
127    ChatCompletionStreamResponse,
128    DeltaMessage,
129    ErrorResponse,
130    ChatCompletionResponse,
131    CompletionRequest,
132    CompletionResponseStreamChoice,
133    CompletionStreamResponse,
134    CompletionResponse,
135    CompletionResponseChoice
136)
137
138def create_streaming_error_response(
139            self,
140            message: str,
141            err_type: str = "BadRequestError",
142            status_code: HTTPStatus = HTTPStatus.BAD_REQUEST) -> str:
143        json_str = json.dumps({
144            "error":
145            self.create_error_response(message=message,
146                                       err_type=err_type,
147                                       status_code=status_code).model_dump()
148        })
149        return json_str
150
151async def chat_completion_stream_generator(
152            request: ChatCompletionRequest,
153            result_generator: AsyncIterator
154    ) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
155
156        model_name = request.model
157        created_time = int(time.time())
158        chunk_object_type = "chat.completion.chunk"
159        first_iteration = True
160
161        try:
162            async for res in result_generator:
163                if not isinstance(res, str):
164                    break
165                if first_iteration:
166                    role = request.messages[-1].role
167                    choice_data = ChatCompletionResponseStreamChoice(
168                        index=0,
169                        delta=DeltaMessage(role=role),
170                        logprobs=None,
171                        finish_reason=None)
172                    chunk = ChatCompletionStreamResponse(
173                        id="",
174                        object=chunk_object_type,
175                        created=created_time,
176                        choices=[choice_data],
177                        model=model_name)
178                    data = chunk.model_dump_json(exclude_unset=True)
179                    yield f"data: {data}\n\n"
180
181                    first_iteration = False
182
183                choice_data = ChatCompletionResponseStreamChoice(
184                    index=0,
185                    delta=DeltaMessage(content=res),
186                    logprobs=None,
187                    finish_reason="stop",
188                    stop_reason="")
189                chunk = ChatCompletionStreamResponse(
190                    id="",
191                    object=chunk_object_type,
192                    created=created_time,
193                    choices=[choice_data],
194                    model=model_name)
195                data = chunk.model_dump_json(exclude_unset=True,
196                                                exclude_none=True)
197                yield f"data: {data}\n\n"
198        except ValueError as e:
199            data = create_streaming_error_response(str(e))
200            yield f"data: {data}\n\n"
201        print("DONE")
202        yield "data: [DONE]\n\n"
203        
204
205
206async def chat_completion(
207            request: ChatCompletionRequest,
208            result_generator: AsyncIterator
209    ) -> Union[ErrorResponse, ChatCompletionResponse]:
210    completion = ""
211    async for chunk in result_generator:
212        completion += chunk
213        
214    return ChatCompletionResponse(
215        id="",
216        object="chat.completion",
217        created=int(time.time()),
218        model=request.model,
219        choices=[ChatCompletionResponseStreamChoice(
220            index=0,
221            delta=DeltaMessage(content=completion),
222            logprobs=None,
223            finish_reason="stop",
224            stop_reason="")])
225
226async def completion_stream_generator(
227            request: CompletionRequest,
228            result_generator: AsyncIterator
229    ) -> Union[ErrorResponse, AsyncGenerator[str, None]]:
230
231        model_name = request.model
232        created_time = int(time.time())
233        chunk_object_type = "chat.completion.chunk"
234        first_iteration = True
235
236        try:
237            async for res in result_generator:
238                if not isinstance(res, str):
239                    break
240                if first_iteration:
241                    choice_data = CompletionResponseStreamChoice(
242                        index=0,
243                        text="",
244                        logprobs=None,
245                        finish_reason=None)
246                    chunk = CompletionStreamResponse(
247                        choices=[choice_data],
248                        model=model_name)
249                    data = chunk.model_dump_json(exclude_unset=True)
250                    yield f"data: {data}\n\n"
251
252                    first_iteration = False
253                choice_data = CompletionResponseStreamChoice(
254                        index=0,
255                        text=res,
256                        logprobs=None,
257                        finish_reason=None)
258                chunk = CompletionStreamResponse(
259                    id="",
260                    object=chunk_object_type,
261                    created=created_time,
262                    choices=[choice_data],
263                    model=model_name)
264                data = chunk.model_dump_json(exclude_unset=True,
265                                                exclude_none=True)
266                yield f"data: {data}\n\n"
267        except ValueError as e:
268            data = create_streaming_error_response(str(e))
269            yield f"data: {data}\n\n"
270        yield "data: [DONE]\n\n"
271        
272
273async def completion(
274            request: CompletionRequest,
275            result_generator: AsyncIterator
276    ) -> Union[ErrorResponse, ChatCompletionResponse]:
277    completion = ""
278    async for chunk in result_generator:
279        completion += chunk
280        
281    return CompletionResponse(
282        model=request.model,
283        choices=[CompletionResponseChoice(
284            index=0,
285            text=completion,
286            finish_reason="stop",
287            stop_reason="")])
288
289
290---
291File: /coding/api/get_query_axons.py
292---
293
294# The MIT License (MIT)
295# Copyright © 2021 Yuma Rao
296# Copyright © 2023 Opentensor Foundation
297# Copyright © 2023 Opentensor Technologies Inc
298
299# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
300# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
301# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
302# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
303
304# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
305# the Software.
306
307# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
308# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
309# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
310# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
311# DEALINGS IN THE SOFTWARE.
312import numpy as np
313import random
314import bittensor as bt
315
316
317async def ping_uids(dendrite, metagraph, uids, timeout=3):
318    """
319    Pings a list of UIDs to check their availability on the Bittensor network.
320
321    Args:
322        dendrite (bittensor.dendrite): The dendrite instance to use for pinging nodes.
323        metagraph (bittensor.metagraph): The metagraph instance containing network information.
324        uids (list): A list of UIDs (unique identifiers) to ping.
325        timeout (int, optional): The timeout in seconds for each ping. Defaults to 3.
326
327    Returns:
328        tuple: A tuple containing two lists:
329            - The first list contains UIDs that were successfully pinged.
330            - The second list contains UIDs that failed to respond.
331    """
332    axons = [metagraph.axons[uid] for uid in uids]
333    try:
334        responses = await dendrite(
335            axons,
336            bt.Synapse(),  # TODO: potentially get the synapses available back?
337            deserialize=False,
338            timeout=timeout,
339        )
340        successful_uids = [
341            uid
342            for uid, response in zip(uids, responses)
343            if response.dendrite.status_code == 200
344        ]
345        failed_uids = [
346            uid
347            for uid, response in zip(uids, responses)
348            if response.dendrite.status_code != 200
349        ]
350    except Exception as e:
351        bt.logging.error(f"Dendrite ping failed: {e}")
352        successful_uids = []
353        failed_uids = uids
354    bt.logging.debug(f"ping() successful uids: {successful_uids}")
355    bt.logging.debug(f"ping() failed uids    : {failed_uids}")
356    return successful_uids, failed_uids
357
358async def get_query_api_nodes(dendrite, metagraph, n=0.1, timeout=3):
359    """
360    Fetches the available API nodes to query for the particular subnet.
361
362    Args:
363        wallet (bittensor.wallet): The wallet instance to use for querying nodes.
364        metagraph (bittensor.metagraph): The metagraph instance containing network information.
365        n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1.
366        timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3.
367
368    Returns:
369        list: A list of UIDs representing the available API nodes.
370    """
371    bt.logging.debug(
372        f"Fetching available API nodes for subnet {metagraph.netuid}"
373    )
374    vtrust_uids = [
375        uid.item()
376        for uid in metagraph.uids
377        if metagraph.validator_trust[uid] > 0
378    ]
379    top_uids = np.where(metagraph.S > np.quantile(metagraph.S, 1 - n))[0].tolist()
380    init_query_uids = set(top_uids).intersection(set(vtrust_uids))
381    query_uids, _ = await ping_uids(
382        dendrite, metagraph, list(init_query_uids), timeout=timeout
383    )
384    bt.logging.debug(
385        f"Available API node UIDs for subnet {metagraph.netuid}: {query_uids}"
386    )
387    if len(query_uids) > 3:
388        query_uids = random.sample(query_uids, 3)
389    return query_uids
390
391
392async def get_query_api_axons(
393    wallet, metagraph=None, n=0.1, timeout=3, uids=None
394):
395    """
396    Retrieves the axons of query API nodes based on their availability and stake.
397
398    Args:
399        wallet (bittensor.wallet): The wallet instance to use for querying nodes.
400        metagraph (bittensor.metagraph, optional): The metagraph instance containing network information.
401        n (float, optional): The fraction of top nodes to consider based on stake. Defaults to 0.1.
402        timeout (int, optional): The timeout in seconds for pinging nodes. Defaults to 3.
403        uids (Union[List[int], int], optional): The specific UID(s) of the API node(s) to query. Defaults to None.
404
405    Returns:
406        list: A list of axon objects for the available API nodes.
407    """
408    dendrite = bt.dendrite(wallet=wallet)
409
410    if metagraph is None:
411        metagraph = bt.metagraph(netuid=21)
412
413    if uids is not None:
414        query_uids = [uids] if isinstance(uids, int) else uids
415    else:
416        query_uids = await get_query_api_nodes(
417            dendrite, metagraph, n=n, timeout=timeout
418        )
419    return [metagraph.axons[uid] for uid in query_uids]
420
421
422
423---
424File: /coding/api/loggers.py
425---
426
427import httpx
428
429class CallCountManager:
430    def __init__(self, url, key):
431        self.url = url
432        self.key = key
433        self.headers = {
434            "Content-Type": "application/json"
435        }
436
437    async def add(self):
438        async with httpx.AsyncClient() as client:
439            response = await client.get(f"{self.url}/counter/add", params={"api_key": self.key}, headers=self.headers)
440            response.raise_for_status()
441            return response.json()
442    
443
444
445---
446File: /coding/api/openai.py
447---
448
449import os
450import httpx
451import dotenv
452import logging
453import asyncio
454import argparse
455import bittensor as bt
456from cachetools.func import ttl_cache
457from contextlib import asynccontextmanager
458from fastapi import FastAPI, HTTPException
459from fastapi.responses import JSONResponse, StreamingResponse
460
461from coding.protocol import StreamCodeSynapse
462from coding.api.loggers import CallCountManager
463from coding.api.protocol import CompletionRequest, ChatCompletionRequest
464from coding.api.completion import completion, chat_completion, chat_completion_stream_generator, completion_stream_generator
465from coding.api.cleaners import clean_fixes, remove_secret_lines, remove_generate_prompt
466
467dotenv.load_dotenv()
468
469logging.basicConfig(level=logging.INFO)
470logger = logging.getLogger(__name__)
471
472
473parser = argparse.ArgumentParser(
474    description="Run the FastAPI server with configurable constants."
475)
476parser.add_argument(
477    "--wallet", type=str, default="test_validator", help="Name of the wallet"
478)  # TODO change to validator
479parser.add_argument("--hotkey", type=str, default="default", help="Name of the hotkey")
480parser.add_argument(
481    "--network", type=str, default="ws://127.0.0.1:9946", help="Network address"
482)  # TODO change to finney
483parser.add_argument(
484    "--netuid", type=int, default=1, help="NetUID value"
485)  # TODO change to real
486parser.add_argument(
487    "--stat_api_url", type=str, default=None, help="Url of the statistics API"
488)  
489parser.add_argument(
490    "--stat_api_key", type=str, default=None, help="Key for the statistics API"
491)  
492args = parser.parse_args()
493
494WALLET_NAME = args.wallet
495HOTKEY_NAME = args.hotkey
496NETWORK = args.network
497NETUID = args.netuid
498
499STAT_API_URL = os.getenv("STAT_API_URL", args.stat_api_url)
500STAT_API_KEY = os.getenv("STAT_API_KEY", args.stat_api_key)
501CALL_COUNTER = None
502
503if STAT_API_URL and STAT_API_KEY:
504    CALL_COUNTER = CallCountManager(url=STAT_API_URL, key=STAT_API_KEY) 
505
506
507subtensor = None
508subnet = None
509wallet = None
510dendrite = None
511
512
513@asynccontextmanager
514async def lifespan(app: FastAPI):
515    global subtensor
516    global subnet
517    global dendrite
518    global wallet
519    app.requests_client = httpx.AsyncClient()
520    subtensor = bt.subtensor(network=NETWORK)
521    subnet = subtensor.metagraph(netuid=NETUID)
522    wallet = bt.wallet(name=WALLET_NAME, hotkey=HOTKEY_NAME)
523    dendrite = bt.dendrite(wallet=wallet)
524    yield
525    await app.requests_client.aclose()
526
527
528@ttl_cache(maxsize=100, ttl=60 * 60)
529def get_top_miner_uid():
530    global subtensor
531    global subnet
532    subtensor = bt.subtensor(network=NETWORK)
533    subnet = subtensor.metagraph(netuid=NETUID)
534    return int(subnet.I.argmax())
535
536
537async def forward(uid, synapse, timeout=25):
538    global dendrite
539    response = await dendrite(
540        axons=subnet.axons[uid],
541        synapse=synapse,
542        deserialize=False,
543        timeout=timeout,
544        streaming=True,
545    )
546    return response
547
548app = FastAPI(
549    lifespan=lifespan,
550    docs_url="/",
551    redoc_url=None,
552)
553
554
555@app.post(
556    "/chat/completions"
557)
558@app.post("/v1/chat/completions")
559async def chat_completions(request: ChatCompletionRequest):
560    if CALL_COUNTER:
561        asyncio.create_task(CALL_COUNTER.add())
562    if not request.attachments:
563        request.attachments = []
564    if not request.files:
565        request.files = []
566    try:
567        generator = await forward(
568            0, StreamCodeSynapse(messages=request.messages, attachments=request.attachments, files=request.files, uid=0)
569        )
570        if request.stream:
571            return StreamingResponse(chat_completion_stream_generator(request, generator), media_type="text/event-stream")
572        else:
573            return JSONResponse(content=(await chat_completion(request, generator)).model_dump())
574    except httpx.ReadTimeout:
575        raise HTTPException(408) from None
576    except Exception as e:
577        raise HTTPException(500) from None
578
579
580async def collect_async_gen(gen):
581    return [item async for item in gen]
582
583
584@app.post("/completions")
585@app.post("/v1/completions")
586async def completions(request: CompletionRequest):
587    if CALL_COUNTER:
588        asyncio.create_task(CALL_COUNTER.add())
589    if isinstance(request.prompt, list):
590        request.prompt = " ".join(request.prompt)
591    # remove any fim prefix/suffixes
592    request.prompt = remove_generate_prompt(remove_secret_lines(clean_fixes(request.prompt)))
593    try: 
594        # generator = await forward(
595        #     get_top_miner_uid(), StreamCodeSynapse(query=clean_deepseek(request.prompt))
596        # )
597        generator = await forward(
598            0, StreamCodeSynapse(query=request.prompt, uid=0)
599        )
600
601        if request.stream:
602            return StreamingResponse(completion_stream_generator(request, generator), media_type="text/event-stream")
603        else:
604            return JSONResponse(content=(await completion(request, generator)).model_dump())
605    except httpx.ReadTimeout:
606        raise HTTPException(408) from None
607    except Exception as e:
608        print(e)
609        raise HTTPException(500) from None
610
611
612@app.get("/models")
613@app.get("/v1/models")
614async def models():
615    try:
616        return "code"
617    except httpx.ReadTimeout:
618        raise HTTPException(408) from None
619    except Exception:
620        raise HTTPException(500) from None
621
622
623if __name__ == "__main__":
624    import uvicorn
625
626    log_config = uvicorn.config.LOGGING_CONFIG
627    log_config["loggers"]["uvicorn"]["level"] = "DEBUG"
628    log_config["loggers"]["uvicorn.error"]["level"] = "DEBUG"
629    log_config["loggers"]["uvicorn.access"]["level"] = "DEBUG"
630    uvicorn.run("coding.api.openai:app", host="0.0.0.0", port=9990, reload=False)
631
632
633
634---
635File: /coding/api/protocol.py
636---
637
638import time
639import uuid
640from typing import List, Literal, Optional, Union, Any
641
642from pydantic import (
643    BaseModel,
644    Field,
645)
646from coding.schemas import ChatMessage
647
648
649def random_uuid() -> str:
650    return str(uuid.uuid4().hex)
651
652class ChatCompletionResponseChoice(BaseModel):
653    index: int
654    message: ChatMessage
655    finish_reason: Optional[Literal["stop", "length"]] = None
656    stop_reason: Union[None, int, str] = None
657
658
659class ChatCompletionResponse(BaseModel):
660    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
661    object: str = "chat.completion"
662    created: int = Field(default_factory=lambda: int(time.time()))
663    model: str
664    choices: List[ChatCompletionResponseChoice]
665
666class CompletionResponseChoice(BaseModel):
667    index: int
668    text: str
669    finish_reason: Optional[Literal["stop", "length"]] = None
670    stop_reason: Union[None, int, str] = Field(
671        default=None,
672        description=(
673            "The stop string or token id that caused the completion "
674            "to stop, None if the completion finished for some other reason "
675            "including encountering the EOS token"),
676    )
677
678class ErrorResponse(BaseModel):
679    object: str = "error"
680    message: str
681    type: str
682    param: Optional[str] = None
683    code: int
684
685
686class UsageInfo(BaseModel):
687    prompt_tokens: int = 0
688    total_tokens: int = 0
689    completion_tokens: Optional[int] = 0
690
691class ResponseFormat(BaseModel):
692    # type must be "json_object" or "text"
693    type: str = Literal["text", "json_object"]
694
695class CompletionRequest(BaseModel):
696    model: str
697    # a string, array of strings, array of tokens, or array of token arrays
698    prompt: Union[List[int], List[List[int]], str, List[str]]
699    stream: Optional[bool] = False
700
701class ChatCompletionRequest(BaseModel):
702    model: str
703    messages: List[ChatMessage]
704    stream: Optional[bool] = True
705    attachments: Union[List[Any], None] = []
706    files: Union[List[Any], None] = []
707    
708
709
710class CompletionResponse(BaseModel):
711    id: str = Field(default_factory=lambda: f"cmpl-{random_uuid()}")
712    object: str = "text_completion"
713    created: int = Field(default_factory=lambda: int(time.time()))
714    model: str
715    choices: List[CompletionResponseChoice]
716    usage: UsageInfo
717
718
719class CompletionResponseStreamChoice(BaseModel):
720    index: int
721    text: str
722    finish_reason: Optional[Literal["stop", "length"]] = None
723    stop_reason: Union[None, int, str] = Field(
724        default=None,
725        description=(
726            "The stop string or token id that caused the completion "
727            "to stop, None if the completion finished for some other reason "
728            "including encountering the EOS token"),
729    )
730
731
732class CompletionStreamResponse(BaseModel):
733    id: str = Field(default_factory=lambda: f"cmpl-{random_uuid()}")
734    object: str = "text_completion"
735    created: int = Field(default_factory=lambda: int(time.time()))
736    model: str
737    choices: List[CompletionResponseStreamChoice]
738    usage: Optional[UsageInfo] = Field(default=None)
739
740class DeltaMessage(BaseModel):
741    role: Optional[str] = None
742    content: Optional[str] = None
743
744
745class ChatCompletionResponseStreamChoice(BaseModel):
746    index: int
747    delta: DeltaMessage
748    finish_reason: Optional[Literal["stop", "length"]] = None
749    stop_reason: Union[None, int, str] = None
750
751
752class ChatCompletionStreamResponse(BaseModel):
753    id: str = Field(default_factory=lambda: f"chatcmpl-{random_uuid()}")
754    object: str = "chat.completion.chunk"
755    created: int = Field(default_factory=lambda: int(time.time()))
756    model: str
757    choices: List[ChatCompletionResponseStreamChoice]
758    usage: Optional[UsageInfo] = Field(default=None)
759
760
761
762---
763File: /coding/api/testing.py
764---
765
766
767
768
769---
770File: /coding/base/utils/weight_utils.py
771---
772
773import numpy as np
774from typing import Tuple, List
775import bittensor
776
777U32_MAX = 4294967295
778U16_MAX = 65535
779
780def normalize_max_weight(
781    x: np.ndarray, limit: float = 0.1
782) -> np.ndarray:
783    r"""Normalizes the numpy array x so that sum(x) = 1 and the max value is not greater than the limit.
784    Args:
785        x (:obj:`np.ndarray`):
786            Array to be max_value normalized.
787        limit: float:
788            Max value after normalization.
789    Returns:
790        y (:obj:`np.ndarray`):
791            Normalized x array.
792    """
793    epsilon = 1e-7  # For numerical stability after normalization
794
795    weights = x.copy()
796    values = np.sort(weights)
797
798    if x.sum() == 0 or len(x) * limit <= 1:
799        return np.ones_like(x) / x.size
800    else:
801        estimation = values / values.sum()
802
803        if estimation.max() <= limit:
804            return weights / weights.sum()
805
806        # Find the cumulative sum and sorted array
807        cumsum = np.cumsum(estimation, 0)
808
809        # Determine the index of cutoff
810        estimation_sum = np.array(
811            [(len(values) - i - 1) * estimation[i] for i in range(len(values))]
812        )
813        n_values = (estimation / (estimation_sum + cumsum + epsilon) < limit).sum()
814
815        # Determine the cutoff based on the index
816        cutoff_scale = (limit * cumsum[n_values - 1] - epsilon) / (
817            1 - (limit * (len(estimation) - n_values))
818        )
819        cutoff = cutoff_scale * values.sum()
820
821        # Applying the cutoff
822        weights[weights > cutoff] = cutoff
823
824        y = weights / weights.sum()
825
826        return y
827
828
829def convert_weights_and_uids_for_emit(
830    uids: np.ndarray, weights: np.ndarray
831) -> Tuple[List[int], List[int]]:
832    r"""Converts weights into integer u32 representation that sum to MAX_INT_WEIGHT.
833    Args:
834        uids (:obj:`np.ndarray,`):
835            Array of uids as destinations for passed weights.
836        weights (:obj:`np.ndarray,`):
837            Array of weights.
838    Returns:
839        weight_uids (List[int]):
840            Uids as a list.
841        weight_vals (List[int]):
842            Weights as a list.
843    """
844    # Checks.
845    weights = weights.tolist()
846    uids = uids.tolist()
847    if np.min(weights) < 0:
848        raise ValueError(
849            "Passed weight is negative cannot exist on chain {}".format(weights)
850        )
851    if np.min(uids) < 0:
852        raise ValueError("Passed uid is negative cannot exist on chain {}".format(uids))
853    if len(uids) != len(weights):
854        raise ValueError(
855            "Passed weights and uids must have the same length, got {} and {}".format(
856                len(uids), len(weights)
857            )
858        )
859    if np.sum(weights) == 0:
860        return [], []  # Nothing to set on chain.
861    else:
862        max_weight = float(np.max(weights))
863        weights = [
864            float(value) / max_weight for value in weights
865        ]  # max-upscale values (max_weight = 1).
866
867    weight_vals = []
868    weight_uids = []
869    for i, (weight_i, uid_i) in enumerate(list(zip(weights, uids))):
870        uint16_val = round(
871            float(weight_i) * int(U16_MAX)
872        )  # convert to int representation.
873
874        # Filter zeros
875        if uint16_val != 0:  # Filter zeros
876            weight_vals.append(uint16_val)
877            weight_uids.append(uid_i)
878
879    return weight_uids, weight_vals
880
881
882def process_weights_for_netuid(
883    uids,
884    weights: np.ndarray,
885    netuid: int,
886    subtensor: "bittensor.subtensor",
887    metagraph: "bittensor.metagraph" = None,
888    exclude_quantile: int = 0,
889) -> np.ndarray:
890    print("process_weights_for_netuid()")
891    print("weights", weights)
892    print("netuid", netuid)
893    print("subtensor", subtensor)
894    print("metagraph", metagraph)
895
896    # Get latest metagraph from chain if metagraph is None.
897    if metagraph == None:
898        metagraph = subtensor.metagraph(netuid)
899
900    # Cast weights to floats.
901    if not isinstance(weights, np.ndarray) or weights.dtype != np.float32:
902        weights = weights.astype(np.float32)
903
904    # Network configuration parameters from an subtensor.
905    # These parameters determine the range of acceptable weights for each neuron.
906    quantile = exclude_quantile / U16_MAX
907    min_allowed_weights = subtensor.min_allowed_weights(netuid=netuid)
908    max_weight_limit = subtensor.max_weight_limit(netuid=netuid)
909    print("quantile", quantile)
910    print("min_allowed_weights", min_allowed_weights)
911    print("max_weight_limit", max_weight_limit)
912
913    # Find all non zero weights.
914    non_zero_weight_idx = np.argwhere(weights > 0).squeeze()
915    non_zero_weight_uids = uids[non_zero_weight_idx]
916    non_zero_weights = weights[non_zero_weight_idx]
917    if non_zero_weights.size == 0 or metagraph.n < min_allowed_weights:
918        bittensor.logging.warning("No non-zero weights returning all ones.")
919        final_weights = np.ones((metagraph.n)) / metagraph.n
920        print("final_weights", final_weights)
921        return np.arange(len(final_weights)), final_weights
922
923    elif non_zero_weights.size < min_allowed_weights:
924        bittensor.logging.warning(
925            "No non-zero weights less then min allowed weight, returning all ones."
926        )
927        weights = (
928            np.ones((metagraph.n)) * 1e-5
929        )  # creating minimum even non-zero weights
930        weights[non_zero_weight_idx] += non_zero_weights
931        print("final_weights", weights)
932        normalized_weights = normalize_max_weight(
933            x=weights, limit=max_weight_limit
934        )
935        return np.arange(len(normalized_weights)), normalized_weights
936
937    print("non_zero_weights", non_zero_weights)
938
939    # Compute the exclude quantile and find the weights in the lowest quantile
940    max_exclude = max(0, len(non_zero_weights) - min_allowed_weights) / len(
941        non_zero_weights
942    )
943    exclude_quantile = min([quantile, max_exclude])
944    lowest_quantile = np.quantile(non_zero_weights, exclude_quantile)
945    print("max_exclude", max_exclude)
946    print("exclude_quantile", exclude_quantile)
947    print("lowest_quantile", lowest_quantile)
948
949    # Exclude all weights below the allowed quantile.
950    condition = non_zero_weights >= lowest_quantile
951    non_zero_weight_uids = non_zero_weight_uids[condition]
952    non_zero_weights = non_zero_weights[condition]
953    print("non_zero_weight_uids", non_zero_weight_uids)
954    print("non_zero_weights", non_zero_weights)
955
956    # Normalize weights and return.
957    normalized_weights = normalize_max_weight(
958        x=non_zero_weights, limit=max_weight_limit
959    )
960    print("final_weights", normalized_weights)
961
962    return non_zero_weight_uids, normalized_weights
963
964
965
966---
967File: /coding/base/__init__.py
968---
969
970
971
972
973---
974File: /coding/base/miner.py
975---
976
977# The MIT License (MIT)
978# Copyright © 2024 Yuma Rao
979
980# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
981# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
982# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
983# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
984
985# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
986# the Software.
987
988# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
989# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
990# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
991# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
992# DEALINGS IN THE SOFTWARE.
993
994import time
995import argparse
996import asyncio
997import threading
998import bittensor as bt
999
1000from typing import Union
1001from traceback import print_exception
1002from coding.base.neuron import BaseNeuron
1003from coding.utils.config import add_miner_args
1004from coding.protocol import StreamCodeSynapse, HFModelSynapse
1005
1006
1007class BaseMinerNeuron(BaseNeuron):
1008    """
1009    Base class for Bittensor miners.
1010    """
1011
1012    @classmethod
1013    def add_args(cls, parser: argparse.ArgumentParser):
1014        super().add_args(parser)
1015        add_miner_args(cls, parser)
1016
1017    def __init__(self, config=None):
1018        super().__init__(config=config)
1019
1020        # Warn if allowing incoming requests from anyone.
1021        if not self.config.blacklist.force_validator_permit:
1022            bt.logging.warning(
1023                "You are allowing non-validators to send requests to your miner. This is a security risk."
1024            )
1025        if self.config.blacklist.allow_non_registered:
1026            bt.logging.warning(
1027                "You are allowing non-registered entities to send requests to your miner. This is a security risk."
1028            )
1029        
1030        # The axon handles request processing, allowing validators to send this miner requests.
1031        self.axon = bt.axon(wallet=self.wallet, config=self.config)
1032
1033        # Attach determiners which functions are called when servicing a request.
1034        bt.logging.info(f"Attaching forward function to miner axon.")
1035        for forward_capability in self.forward_capabilities:
1036            forward_fn = forward_capability['forward']
1037            blacklist_fn = forward_capability['blacklist']
1038            priority_fn = forward_capability['priority']
1039            self.axon.attach(
1040                forward_fn=forward_fn,
1041                blacklist_fn=blacklist_fn,
1042                priority_fn=priority_fn,
1043            )
1044        bt.logging.info(f"Axon created: {self.axon}")
1045
1046        # Instantiate runners
1047        self.should_exit: bool = False
1048        self.is_running: bool = False
1049        self.thread: threading.Thread = None
1050        self.lock = asyncio.Lock()
1051
1052    def run(self):
1053        """
1054        Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.
1055
1056        This function performs the following primary tasks:
1057        1. Check for registration on the Bittensor network.
1058        2. Starts the miner's axon, making it active on the network.
1059        3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights.
1060
1061        The miner continues its operations until `should_exit` is set to True or an external interruption occurs.
1062        During each epoch of its operation, the miner waits for new blocks on the Bittensor network, updates its
1063        knowledge of the network (metagraph), and sets its weights. This process ensures the miner remains active
1064        and up-to-date with the network's latest state.
1065
1066        Note:
1067            - The function leverages the global configurations set during the initialization of the miner.
1068            - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests.
1069
1070        Raises:
1071            KeyboardInterrupt: If the miner is stopped by a manual interruption.
1072            Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis.
1073        """
1074
1075        # Check that miner is registered on the network.
1076        self.sync()
1077
1078        # Serve passes the axon information to the network + netuid we are hosting on.
1079        # This will auto-update if the axon port of external ip have changed.
1080        bt.logging.info(
1081            f"Serving miner axon {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1082        )
1083        self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
1084
1085        # Start  starts the miner's axon, making it active on the network.
1086        self.axon.start()
1087
1088        bt.logging.info(f"Miner starting at block: {self.block}")
1089
1090        # This loop maintains the miner's operations until intentionally stopped.
1091        try:
1092            while not self.should_exit:
1093                while (
1094                    self.block - self.metagraph.last_update[self.uid]
1095                    < self.config.neuron.epoch_length
1096                ):
1097                    # Wait before checking again.
1098                    time.sleep(1)
1099
1100                    # Check if we should exit.
1101                    if self.should_exit:
1102                        break
1103
1104                # Sync metagraph and potentially set weights.
1105                self.sync()
1106                self.step += 1
1107
1108        # If someone intentionally stops the miner, it'll safely terminate operations.
1109        except KeyboardInterrupt:
1110            self.axon.stop()
1111            bt.logging.success("Miner killed by keyboard interrupt.")
1112            exit()
1113
1114        # In case of unforeseen errors, the miner will log the error and continue operations.
1115        except Exception as err:
1116            bt.logging.error("Error during mining", str(err))
1117            bt.logging.debug(print_exception(type(err), err, err.__traceback__))
1118            self.should_exit = True
1119
1120    def run_in_background_thread(self):
1121        """
1122        Starts the miner's operations in a separate background thread.
1123        This is useful for non-blocking operations.
1124        """
1125        if not self.is_running:
1126            bt.logging.debug("Starting miner in background thread.")
1127            self.should_exit = False
1128            self.thread = threading.Thread(target=self.run, daemon=True)
1129            self.thread.start()
1130            self.is_running = True
1131            bt.logging.debug("Started")
1132
1133    def stop_run_thread(self):
1134        """
1135        Stops the miner's operations that are running in the background thread.
1136        """
1137        if self.is_running:
1138            bt.logging.debug("Stopping miner in background thread.")
1139            self.should_exit = True
1140            self.thread.join(5)
1141            self.is_running = False
1142            bt.logging.debug("Stopped")
1143
1144    def __enter__(self):
1145        """
1146        Starts the miner's operations in a background thread upon entering the context.
1147        This method facilitates the use of the miner in a 'with' statement.
1148        """
1149        self.run_in_background_thread()
1150
1151        return self
1152
1153    def __exit__(self, exc_type, exc_value, traceback):
1154        """
1155        Stops the miner's background operations upon exiting the context.
1156        This method facilitates the use of the miner in a 'with' statement.
1157
1158        Args:
1159            exc_type: The type of the exception that caused the context to be exited.
1160                      None if the context was exited without an exception.
1161            exc_value: The instance of the exception that caused the context to be exited.
1162                       None if the context was exited without an exception.
1163            traceback: A traceback object encoding the stack trace.
1164                       None if the context was exited without an exception.
1165        """
1166        self.stop_run_thread()
1167
1168    def resync_metagraph(self):
1169        """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph."""
1170        # bt.logging.info("resync_metagraph()")
1171        
1172        # Sync the metagraph.
1173        self.metagraph.sync(subtensor=self.subtensor)
1174        self.last_block_sync = self.block
1175
1176
1177
1178---
1179File: /coding/base/neuron.py
1180---
1181
1182# The MIT License (MIT)
1183# Copyright © 2024 Yuma Rao
1184
1185# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
1186# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
1187# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
1188# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1189
1190# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
1191# the Software.
1192
1193# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
1194# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1195# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1196# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1197# DEALINGS IN THE SOFTWARE.
1198
1199import sys
1200import copy
1201import json
1202
1203import bittensor as bt
1204
1205from abc import ABC, abstractmethod
1206
1207# Sync calls set weights and also resyncs the metagraph.
1208from coding.utils.config import check_config, add_args, config
1209from coding.utils.misc import ttl_get_block
1210from coding import __spec_version__ as spec_version
1211
1212from coding.mock import MockSubtensor, MockMetagraph
1213
1214
1215class BaseNeuron(ABC):
1216    """
1217    Base class for Bittensor miners. This class is abstract and should be inherited by a subclass. It contains the core logic for all neurons; validators and miners.
1218
1219    In addition to creating a wallet, subtensor, and metagraph, this class also handles the synchronization of the network state via a basic checkpointing mechanism based on epoch length.
1220    """
1221
1222    @classmethod
1223    def check_config(cls, config: "bt.Config"):
1224        check_config(cls, config)
1225
1226    @classmethod
1227    def add_args(cls, parser):
1228        add_args(cls, parser)
1229
1230    @classmethod
1231    def _config(cls):
1232        return config(cls)
1233
1234    subtensor: "bt.subtensor"
1235    wallet: "bt.wallet"
1236    metagraph: "bt.metagraph"
1237    spec_version: int = spec_version
1238
1239    @property
1240    def block(self):
1241        return ttl_get_block(self)
1242
1243    def __init__(self, config=None):
1244        print("start", flush=True)
1245        base_config = copy.deepcopy(config or BaseNeuron._config())
1246        self.config = self._config()
1247        self.config.merge(base_config)
1248        self.check_config(self.config)
1249
1250        # Set up logging with the provided configuration and directory.
1251        bt.logging(config=self.config, logging_dir=self.config.full_path)
1252
1253        # If a gpu is required, set the device to cuda:N (e.g. cuda:0)
1254        self.device = self.config.neuron.device
1255
1256        # Log the configuration for reference.
1257        bt.logging.info(self.config)
1258
1259        # Build Bittensor objects
1260        # These are core Bittensor classes to interact with the network.
1261        bt.logging.info("Setting up bittensor objects.")
1262
1263        # The wallet holds the cryptographic key pairs for the miner.
1264        if self.config.mock:
1265            self.wallet = bt.MockWallet(config=self.config)
1266            self.subtensor = MockSubtensor(self.config.netuid, wallet=self.wallet)
1267            self.metagraph = MockMetagraph(netuid=self.config.netuid, subtensor=self.subtensor)
1268        else:
1269            self.wallet = bt.wallet(config=self.config)
1270            self.subtensor = bt.subtensor(config=self.config)
1271            self.metagraph = self.subtensor.metagraph(self.config.netuid)
1272
1273        bt.logging.info(f"Wallet: {self.wallet}")
1274        bt.logging.info(f"Subtensor: {self.subtensor}")
1275        bt.logging.info(f"Metagraph: {self.metagraph}")
1276        
1277        # Check if the miner is registered on the Bittensor network before proceeding further.
1278        self.check_registered()
1279
1280        # Each miner gets a unique identity (UID) in the network for differentiation.
1281        self.uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
1282        bt.logging.info(
1283            f"Running neuron on subnet: {self.config.netuid} with uid {self.uid} using network: {self.subtensor.chain_endpoint}"
1284        )
1285        self.last_block_sync = self.block
1286        self.step = 0
1287        
1288
1289    @abstractmethod
1290    def forward(self, synapse: bt.Synapse) -> bt.Synapse:
1291        ...
1292
1293    @abstractmethod
1294    def run(self):
1295        ...
1296
1297    def sync(self):
1298        """
1299        Wrapper for synchronizing the state of the network for the given miner or validator.
1300        """
1301        # Ensure miner or validator hotkey is still registered on the network.
1302        self.check_registered()
1303
1304        if self.should_sync_metagraph():
1305            self.resync_metagraph()
1306
1307        if self.should_set_weights():
1308            self.set_weights()
1309
1310        # Always save state.
1311        self.save_state()
1312
1313    def check_registered(self):
1314        # --- Check for registration.
1315        try:
1316            if not self.subtensor.is_hotkey_registered(
1317                netuid=self.config.netuid,
1318                hotkey_ss58=self.wallet.hotkey.ss58_address,
1319            ):
1320                bt.logging.error(
1321                    f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}."
1322                    f" Please register the hotkey using `btcli subnets register` before trying again"
1323                )
1324                sys.exit()
1325        except json.decoder.JSONDecodeError:
1326            bt.logging.error(
1327                f"JSONDecodeError encountered while checking registration for wallet: {self.wallet} on netuid {self.config.netuid}."
1328            )
1329            # Handle the error or continue without exiting
1330    def should_sync_metagraph(self):
1331        """
1332        Check if enough epoch blocks have elapsed since the last checkpoint to sync.
1333        """
1334        return (
1335            self.block - self.last_block_sync
1336        ) > self.config.neuron.epoch_length
1337
1338    def should_set_weights(self) -> bool:
1339        # Don't set weights on initialization.
1340        if self.step == 0:
1341            return False
1342
1343        # Check if enough epoch blocks have elapsed since the last epoch.
1344        if self.config.neuron.disable_set_weights:
1345            return False
1346
1347        # If neuron has validator permit we assume its running the validator code. If it is a dual permit neuron then we check that it also has a set_weights method (only true if it is running validator neuron)
1348        if not self.metagraph.validator_permit[self.uid] or not hasattr(
1349            self, "set_weights"
1350        ):
1351            return False
1352
1353        # Define appropriate logic for when set weights.
1354        return (
1355            self.block - self.metagraph.last_update[self.uid]
1356        ) > self.config.neuron.epoch_length
1357
1358    def save_state(self):
1359        pass
1360
1361    def load_state(self):
1362        bt.logging.debug(
1363            "load_state() not implemented for this neuron. You can implement this function to load model checkpoints or other useful data."
1364        )
1365
1366
1367
1368---
1369File: /coding/base/validator.py
1370---
1371
1372# The MIT License (MIT)
1373# Copyright © 2024 Yuma Rao
1374
1375# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
1376# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
1377# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
1378# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
1379
1380# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
1381# the Software.
1382
1383# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
1384# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
1385# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
1386# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
1387# DEALINGS IN THE SOFTWARE.
1388
1389import os
1390import sys
1391import copy
1392import asyncio
1393import argparse
1394import threading
1395import bittensor as bt
1396import numpy as np
1397
1398from traceback import print_exception
1399
1400from coding.mock import MockDendrite
1401from coding.base.neuron import BaseNeuron
1402from coding.utils.config import add_validator_args
1403from coding.utils.exceptions import MaxRetryError
1404from coding.utils.uids import get_hotkey_from_uid, get_uid_from_hotkey
1405
1406class BaseValidatorNeuron(BaseNeuron):
1407    """
1408    Base class for Bittensor validators. Your validator should inherit from this class.
1409    """
1410
1411    @classmethod
1412    def add_args(cls, parser: argparse.ArgumentParser):
1413        super().add_args(parser)
1414        add_validator_args(cls, parser)
1415
1416    def __init__(self, config=None):
1417        super().__init__(config=config)
1418        self.load_state()
1419        # Save a copy of the hotkeys to local memory.
1420        self.hotkeys = copy.deepcopy(self.metagraph.hotkeys)
1421
1422        # Dendrite lets us send messages to other nodes (axons) in the network.
1423        if self.config.mock:
1424            self.dendrite = MockDendrite(wallet=self.wallet)
1425        else:
1426            self.dendrite = bt.dendrite(wallet=self.wallet)
1427        bt.logging.info(f"Dendrite: {self.dendrite}")
1428
1429        # Set up initial scoring weights for validation
1430        bt.logging.info("Building validation weights.")
1431        self.scores = np.zeros(
1432            self.metagraph.n
1433        )
1434        # Init sync with the network. Updates the metagraph.
1435        self.sync()
1436
1437        # Serve axon to enable external connections.
1438        if not self.config.neuron.axon_off:
1439            self.serve_axon()
1440        else:
1441            bt.logging.warning("axon off, not serving ip to chain.")
1442
1443        # Create asyncio event loop to manage async tasks.
1444        self.loop = asyncio.get_event_loop()
1445
1446        # Instantiate runners
1447        self.should_exit: bool = False
1448        self.is_running: bool = False
1449        self.thread: threading.Thread = None
1450        self.lock = asyncio.Lock()
1451
1452    def serve_axon(self):
1453        """Serve axon to enable external connections."""
1454
1455        bt.logging.info("serving ip to chain...")
1456        try:
1457            self.axon = bt.axon(wallet=self.wallet, config=self.config)
1458
1459            try:
1460                self.axon.attach(
1461                    forward_fn=self._forward,
1462                    blacklist_fn=self.blacklist,
1463                    priority_fn=self.priority,
1464                )
1465                self.axon.serve(
1466                    netuid=self.config.netuid,
1467                    subtensor=self.subtensor,
1468                )
1469            except Exception as e:
1470                bt.logging.error(f"Failed to serve Axon with exception: {e}")
1471
1472        except Exception as e:
1473            bt.logging.error(f"Failed to create Axon initialize with exception: {e}")
1474
1475    def run(self):
1476        """
1477        Initiates and manages the main loop for the miner on the Bittensor network. The main loop handles graceful shutdown on keyboard interrupts and logs unforeseen errors.
1478
1479        This function performs the following primary tasks:
1480        1. Check for registration on the Bittensor network.
1481        2. Continuously forwards queries to the miners on the network, rewarding their responses and updating the scores accordingly.
1482        3. Periodically resynchronizes with the chain; updating the metagraph with the latest network state and setting weights.
1483
1484        The essence of the validator's operations is in the forward function, which is called every step. The forward function is responsible for querying the network and scoring the responses.
1485
1486        Note:
1487            - The function leverages the global configurations set during the initialization of the miner.
1488            - The miner's axon serves as its interface to the Bittensor network, handling incoming and outgoing requests.
1489
1490        Raises:
1491            KeyboardInterrupt: If the miner is stopped by a manual interruption.
1492            Exception: For unforeseen errors during the miner's operation, which are logged for diagnosis.
1493        """
1494
1495        # Check that validator is registered on the network.
1496        
1497        try:
1498            self.sync()
1499        except Exception as e: # Broken pipe handling 
1500            bt.logging.error("Error while syncing, killing self to restart", str(e))
1501            bt.logging.debug(print_exception(type(e), e, e.__traceback__))
1502            sys.exit(1)
1503        if not self.config.neuron.axon_off:
1504            try:
1505                bt.logging.info(
1506                    f"Running validator {self.axon} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1507                )
1508                # serve the axon
1509                self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
1510                self.axon.start()
1511            except Exception as e:
1512                bt.logging.error(f"Failed to serve and then start Axon with exception: {e}")
1513        else:
1514            bt.logging.info(
1515                f"Running validator on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
1516            )
1517
1518        bt.logging.info(f"Validator starting at block: {self.block}")
1519
1520        # This loop maintains the validator's operations until intentionally stopped.
1521        try:
1522            while True:
1523                bt.logging.info(f"step({self.step}) block({self.block})")
1524
1525                forward_timeout = self.config.neuron.forward_max_time 
1526                try:
1527                    tasks = [self.loop.create_task(asyncio.run(self.forward(synapse=None))) for _ in range(self.config.neuron.num_concurrent_forwards)]
1528                    self.loop.run_until_complete(
1529                        asyncio.wait_for(asyncio.gather(*tasks), timeout=forward_timeout)
1530                    )
1531                except MaxRetryError as e:
1532                    bt.logging.error(f"MaxRetryError: {e}")
1533                    continue
1534                except asyncio.TimeoutError as e:
1535                    bt.logging.error(
1536                        f"Forward timeout: Task execution exceeded {forward_timeout} seconds and was cancelled.: {e}"
1537                    )
1538                    continue
1539                except Exception as e: # TODO this wasnt here previously, but any errors were cancelling the forward loop so i added it
1540                    bt.logging.error("Error during validation", str(e))
1541                    bt.logging.debug(print_exception(type(e), e, e.__traceback__))
1542                    sys.exit(1)
1543
1544                # Check if we should exit.
1545                if self.should_exit:
1546                    break
1547
1548                # Sync metagraph and potentially set weights.
1549                self.sync()
1550                if self.step is None:
1551                    self.step = 0
1552                self.step += 1
1553
1554        # If someone intentionally stops the validator, it'll safely terminate operations.
1555        except KeyboardInterrupt:
1556            self.axon.stop()
1557            bt.logging.success("Validator killed by keyboard interrupt.")
1558            sys.exit()
1559
1560        # In case of unforeseen errors, the validator will log the error and quit
1561        except Exception as err:
1562            bt.logging.error("Error during validation", str(err))
1563            bt.logging.debug(print_exception(type(err), err, err.__traceback__))
1564            # self.should_exit = True
1565            sys.exit()
1566            
1567
1568    def run_in_background_thread(self):
1569        """
1570        Starts the validator's operations in a background thread upon entering the context.
1571        This method facilitates the use of the validator in a 'with' statement.
1572        """
1573        if not self.is_running:
1574            bt.logging.debug("Starting validator in background thread.")
1575            self.should_exit = False
1576            self.thread = threading.Thread(target=self.run, daemon=True)
1577            self.thread.start()
1578            self.is_running = True
1579            bt.logging.debug("Started")
1580
1581    def stop_run_thread(self):
1582        """
1583        Stops the validator's operations that are running in the background thread.
1584        """
1585        if self.is_running:
1586            bt.logging.debug("Stopping validator in background thread.")
1587            self.should_exit = True
1588            self.thread.join(5)
1589            self.is_running = False
1590            bt.logging.debug("Stopped")
1591
1592    def __enter__(self):
1593        self.run_in_background_thread()
1594        return self
1595
1596    def __exit__(self, exc_type, exc_value, traceback):
1597        """
1598        Stops the validator's background operations upon exiting the context.
1599        This method facilitates the use of the validator in a 'with' statement.
1600
1601        Args:
1602            exc_type: The type of the exception that caused the context to be exited.
1603                      None if the context was exited without an exception.
1604            exc_value: The instance of the exception that caused the context to be exited.
1605                       None if the context was exited without an exception.
1606            traceback: A traceback object encoding the stack trace.
1607                       None if the context was exited without an exception.
1608        """
1609        if self.is_running:
1610            bt.logging.debug("Stopping validator in background thread.")
1611            self.should_exit = True
1612            self.thread.join(5)
1613            self.is_running = False
1614            bt.logging.debug("Stopped")
1615
1616    def set_weights(self):
1617        """
1618        Sets the validator weights to the metagraph hotkeys based on the scores it has received from the miners. The weights determine the trust and incentive level the validator assigns to miner nodes on the network.
1619        """
1620        # check to be sure self.scores is not all 0's
1621        if np.all(self.scores == 0):
1622            bt.logging.warning("self.scores is all 0's, skipping set_weights.")
1623            return
1624        # Check if self.scores contains any NaN values and log a warning if it does.
1625        for _ in range(1):
1626            raw_weights = np.divide(self.scores, np.sum(self.scores, axis=0))
1627
1628            # Process the raw weights to final_weights via subtensor limitations.
1629            (
1630                processed_weight_uids,
1631                processed_weights,
1632            ) = bt.utils.weight_utils.process_weights_for_netuid(
1633                uids=self.metagraph.uids,
1634                weights=raw_weights,
1635                netuid=self.config.netuid,
1636                subtensor=self.subtensor,
1637                metagraph=self.metagraph,
1638            )
1639            print("processed_weights", processed_weights)
1640            print("processed_weight_uids", processed_weight_uids)
1641
1642            # Convert to uint16 weights and uids.
1643            (
1644                uint_uids,
1645                uint_weights,
1646            ) = bt.utils.weight_utils.convert_weights_and_uids_for_emit(
1647                uids=processed_weight_uids, weights=processed_weights
1648            )
1649            print("uint_weights", uint_weights)
1650            print("uint_uids", uint_uids)
1651            # Set the weights on chain via our subtensor connection.
1652            result, msg = self.subtensor.set_weights(
1653                wallet=self.wallet,
1654                netuid=self.config.netuid,
1655                uids=uint_uids,
1656                weights=uint_weights,
1657                wait_for_finalization=False,
1658                wait_for_inclusion=False,
1659                version_key=self.spec_version,
1660            )
1661            if result is True:
1662                bt.logging.info("set_weights on chain successfully!")
1663                return 
1664            else:
1665                bt.logging.error(f"set_weights failed {msg}")
1666
1667    def resync_metagraph(self):
1668        """Resyncs the metagraph and updates the hotkeys and moving averages based on the new metagraph."""
1669        bt.logging.info("resync_metagraph()")
1670
1671        # Copies state of metagraph before syncing.
1672        previous_metagraph = copy.deepcopy(self.metagraph)
1673
1674        # Sync the metagraph.
1675        self.metagraph.sync(subtensor=self.subtensor)
1676
1677        # Check if the metagraph axon info has changed.
1678        if previous_metagraph.axons == self.metagraph.axons:
1679            return
1680
1681        bt.logging.info(
1682            "Metagraph updated, re-syncing hotkeys, dendrite pool and moving averages"
1683        )
1684        # Zero out all hotkeys that have been replaced.
1685        for uid, hotkey in enumerate(self.hotkeys):
1686            if hotkey != self.metagraph.hotkeys[uid]:
1687                self.scores[uid] = 0  # hotkey has been replaced
1688
1689        # Check to see if the metagraph has changed size.
1690        # If so, we need to add new hotkeys and moving averages.
1691        if len(self.hotkeys) < len(self.metagraph.hotkeys):
1692            # Update the size of the moving average scores.
1693            new_moving_average = np.zeros((self.metagraph.n))
1694            min_len = min(len(self.hotkeys), len(self.scores))
1695            new_moving_average[:min_len] = self.scores[:min_len]
1696            self.scores = new_moving_average
1697
1698        # Update the hotkeys.
1699        self.hotkeys = copy.deepcopy(self.metagraph.hotkeys)
1700
1701    def update_scores(self):
1702        """Performs exponential moving average on the scores based on the rewards received from the miners."""
1703        if not self.finetune_results:
1704            return
1705        latest_competition_id = max(self.finetune_results.keys())
1706        finetune_scores = np.zeros(self.metagraph.n)
1707        for tracker in self.finetune_results[latest_competition_id].trackers:
1708            finetune_scores[tracker.uid] = tracker.score
1709        
1710        max_score = np.max(finetune_scores)
1711        threshold = max_score - 0.1  # within 0.1 of max score
1712        finetune_scores[finetune_scores < threshold] = 0
1713        self.scores = finetune_scores
1714        bt.logging.info(f"Updated moving avg scores: {self.scores}")
1715
1716    def save_state(self):
1717        """Saves the state of the validator to a file."""
1718        bt.logging.info("Saving validator state.")
1719
1720        # Convert finetune_results to a numpy array of tuples for saving
1721        finetune_items = np.array(list(self.finetune_results.items()), dtype=object)
1722
1723        # Save the state of the validator to file.
1724        np.savez(
1725            self.config.neuron.full_path + "/state.npz",
1726            step=self.step,
1727            scores=self.scores,
1728            hotkeys=self.hotkeys,
1729            finetune_items=finetune_items,
1730        )
1731
1732    def load_state(self):
1733        """Loads the state of the validator from a file."""
1734        bt.logging.info("Loading validator state.")
1735
1736        state_path = self.config.neuron.full_path + "/state.npz"
1737        
1738        # Check if the state file exists before loading.
1739        if not os.path.exists(state_path):
1740            bt.warning("State file not found. Loading default state.")
1741            self.step = None
1742            self.scores = None
1743            self.hotkeys = None
1744            self.finetune_results = {}
1745            return
1746
1747        # Load the state of the validator from file.
1748        state = np.load(state_path, allow_pickle=True)
1749        
1750        # Set attributes, using default values if they don't exist in the state file.
1751        self.step = state["step"].item() if "step" in state else None
1752        self.scores = state["scores"] if "scores" in state else None
1753        self.hotkeys = state["hotkeys"] if "hotkeys" in state else None
1754        
1755        # Convert finetune_items back to dictionary
1756        self.finetune_results = {}
1757        if "finetune_items" in state:
1758            for key, value in state["finetune_items"]:
1759                self.finetune_results[key] = value
1760    
1761    
1762    
1763
1764
1765
1766---
1767File: /coding/datasets/prompts/bigcodebench.py
1768---
1769
1770DATA_SYNTH_PROMPT = """
1771Based on the following simple example, write more complex scenarios and invoke multiple Python libraries 
1772to solve each problem.
1773The written intent should align with a more specific and practical scenario, but should still be easy to 
1774do functional correctness assertion.
1775For each scenario, write a single Python function with the rewritten intent.
1776Please include requirements and terminal-based input-output examples in the function docstring.
1777The function should contain complex logic like if-else statements and loops.
1778You have to use more than three Python libraries for a scenario. Write imports and variable definitions 
1779outside the function.
1780Try to avoid using web APIs if possible.
1781If there are any constants (e.g. strings and numeric values) used in the functions, you need to declare 
1782them before the function.
1783If data is used, you need to provide sample data in the comment.
1784Try to return values for correctness assertion.
1785Each programming scenario and intent should be separated by a newline.
1786Generate two examples with two scenarios from the following simple example:
1787```python
1788def count_char(char, word):
1789    \"\"\"Counts the characters in word\"\"\"
1790    return word.count(char) # If you want to do it manually try a for loop
1791```
1792
1793
1794Scenario 1:
1795```python
1796import re
1797from collections import Counter
1798from itertools import chain
1799import pandas as pd
1800import numpy as np
1801import random
1802import string
1803
1804# Constants
1805COMMON_WORDS = ["the", "be", "to", "of", "and", "a", "in", "that", "have", "I"]
1806THRESHOLD_FREQUENCY = 5
1807
1808
1809def analyze_text_corpus(corpus):
1810    \"\"\"
1811    Analyzes a list of text documents for word frequency, rare words, and token length statistics.
1812    
1813    Parameters:
1814        - corpus (List[str]): A list of text documents, where each document is a single string.
1815        
1816    Requirements:
1817        - re
1818        - collections
1819        - itertools
1820        - pandas
1821        - numpy
1822        - random
1823        - string
1824
1825    Example:
1826    >>> corpus = [
1827    ...     "The quick brown fox jumps over the lazy dog.",
1828    ...     "To be or not to be, that is the question.",
1829    ...     "A journey of a thousand miles begins with a single step."
1830    ... ]
1831    >>> result = analyze_text_corpus(corpus)
1832    >>> print(result)
1833    {
1834        'most_common_words': [('the', 3), ('be', 2)],
1835        'rare_words': ['journey', 'thousand', 'begins'],
1836        'token_length_stats': {
1837            'mean': 4.0,
1838            'std_dev': 1.58,
1839            'median': 4
1840        }
1841    }
1842    
1843    Returns:
1844        dict: A dictionary containing the most common words, rare words, and token length statistics.
1845    \"\"\"
1846    
1847    # Tokenize and filter common words
1848    all_tokens = [re.findall(r'\b\w+\b', doc.lower()) for doc in corpus]
1849    flattened_tokens = list(chain.from_iterable(all_tokens))
1850    filtered_tokens = [word for word in flattened_tokens if word not in COMMON_WORDS]
1851
1852    # Word frequency analysis
1853    word_counts = Counter(filtered_tokens)
1854    most_common_words = word_counts.most_common(5)
1855    rare_words = [word for word, count in word_counts.items() if count < THRESHOLD_FREQUENCY]
1856    
1857    # Token length analysis
1858    token_lengths = [len(token) for token in flattened_tokens]
1859    token_length_series = pd.Series(token_lengths)
1860    token_length_stats = {
1861        'mean': np.round(token_length_series.mean(), 2),
1862        'std_dev': np.round(token_length_series.std(), 2),
1863        'median': int(token_length_series.median())
1864    }
1865    
1866    return {
1867        'most_common_words': most_common_words,
1868        'rare_words': rare_words,
1869        'token_length_stats': token_length_stats
1870    }
1871```
1872Scenario 2:
1873```python
1874import re
1875from collections import Counter
1876from itertools import chain
1877import pandas as pd
1878import numpy as np
1879import random
1880import string
1881# Sample dataset for product data analysis
1882# Commented data format for input to function
1883# products = [
1884#     {"name": "Laptop", "price": 899.99, "category": "Electronics"},
1885#     {"name": "Book", "price": 14.99, "category": "Education"},
1886#     {"name": "Smartphone", "price": 699.99, "category": "Electronics"},
1887#     {"name": "Pen", "price": 1.99, "category": "Stationery"},
1888#     {"name": "Notebook", "price": 2.99, "category": "Stationery"},
1889#     {"name": "Headphones", "price": 199.99, "category": "Electronics"},
1890# ]
1891
1892def product_category_statistics(products):
1893    \"\"\"
1894    Processes product information to analyze average prices, identify top categories,
1895    and group products by category based on price ranges.
1896    
1897    Parameters:
1898        - products (List[dict]): A list of dictionaries with keys 'name', 'price', and 'category'
1899        
1900    Requirements:
1901        - collections
1902        - pandas
1903        - numpy
1904        - random
1905        - string
1906
1907    Example:
1908    >>> products = [
1909    ...     {"name": "Laptop", "price": 899.99, "category": "Electronics"},
1910    ...     {"name": "Book", "price": 14.99, "category": "Education"},
1911    ...     {"name": "Smartphone", "price": 699.99, "category": "Electronics"},
1912    ...     {"name": "Pen", "price": 1.99, "category": "Stationery"},
1913    ...     {"name": "Notebook", "price": 2.99, "category": "Stationery"},
1914    ...     {"name": "Headphones", "price": 199.99, "category": "Electronics"},
1915    ... ]
1916    >>> result = product_category_statistics(products)
1917    >>> print(result)
1918    {
1919        'average_price_by_category': {'Electronics': 599.99, 'Education': 14.99, 'Stationery': 2.49},
1920        'top_category': 'Electronics',
1921        'products_in_price_ranges': {
1922            'low': ['Pen', 'Notebook'],
1923            'mid': ['Book', 'Headphones'],
1924            'high': ['Smartphone', 'Laptop']
1925        }
1926    }
1927    
1928    Returns:
1929        dict: A dictionary containing average prices, the top category, and products grouped by price ranges.
1930    \"\"\"
1931
1932    # DataFrame creation
1933    df = pd.DataFrame(products)
1934
1935    # Average price by category
1936    avg_price_by_category = df.groupby("category")["price"].mean().round(2).to_dict()
1937    
1938    # Top category by product count
1939    category_counts = Counter(df['category'])
1940    top_category = category_counts.most_common(1)[0][0]
1941
1942    # Price range grouping
1943    price_ranges = {'low': [], 'mid': [], 'high': []}
1944    for _, row in df.iterrows():
1945        if row["price"] < 10:
1946            price_ranges['low'].append(row["name"])
1947        elif 10 <= row["price"] < 100:
1948            price_ranges['mid'].append(row["name"])
1949        else:
1950            price_ranges['high'].append(row["name"])
1951
1952    return {
1953        'average_price_by_category': avg_price_by_category,
1954        'top_category': top_category,
1955        'products_in_price_ranges': price_ranges
1956    }
1957```
1958
1959Above is the illustration.
1960
1961Generate five complex scenarios based on the following simple example:
1962"""
1963
1964
1965
1966---
1967File: /coding/datasets/__init__.py
1968---
1969
1970from .base import Dataset
1971
1972from .bigcodebench import BigCodeBenchDataset
1973from .thestack import TheStackDataset
1974from .pip import PipDataset
1975from .swe import SWEBenchDataset
1976
1977class DatasetManager:
1978    def __init__(self, config = None):
1979        self._datasets = None
1980        self.config = config
1981
1982    @property
1983    def datasets(self):
1984        if self._datasets is None:
1985            self._datasets = {
1986                TheStackDataset.name: TheStackDataset(),
1987                PipDataset.name: PipDataset(),
1988                SWEBenchDataset.name: SWEBenchDataset()
1989            }
1990        return self._datasets
1991
1992
1993
1994---
1995File: /coding/datasets/base.py
1996---
1997
1998# The MIT License (MIT)
1999# Copyright © 2024 Yuma Rao
2000# Copyright © 2023 Opentensor Foundation
2001# Copyright © 2024 Macrocosmos
2002
2003# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2004# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2005# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2006# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2007
2008# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2009# the Software.
2010
2011# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2012# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2013# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2014# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2015# DEALINGS IN THE SOFTWARE.
2016
2017import time
2018import random
2019import functools
2020from abc import ABC, abstractmethod
2021from typing import Dict
2022import bittensor as bt
2023
2024from coding.schemas.context import Context
2025from coding.helpers.selector import Selector
2026from coding.utils.exceptions import MaxRetryError
2027
2028class Dataset(ABC):
2029    """Base class for datasets."""
2030
2031    max_tries: int = 10
2032
2033    @abstractmethod
2034    def search(self, name):
2035        ...
2036
2037    @abstractmethod
2038    def random(self, name):
2039        ...
2040
2041    @abstractmethod
2042    def get(self, name):
2043        ...
2044
2045    def next(
2046        self, method: str = "random", selector: Selector = Selector(), **kwargs
2047    ) -> Context:
2048        tries = 1
2049        t0 = time.time()
2050
2051        while True:
2052            info = {}
2053            if method == "random":
2054                info = self.random(selector=selector, **kwargs)
2055            elif method == "search":
2056                info = self.search(selector=selector, **kwargs)
2057            elif method == "get":
2058                info = self.get(selector=selector, **kwargs)
2059            else:
2060                raise ValueError(f"Unknown dataset get method {method!r}")
2061
2062            if info:
2063                break
2064
2065            bt.logging.debug(
2066                f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries. Retrying... ({self.max_tries - tries} tries remaining.)"
2067            )
2068
2069            tries += 1
2070            if tries >= self.max_tries:
2071                raise MaxRetryError(
2072                    f"Could not find any samples which meet {self.__class__.__name__} requirements after {tries} tries."
2073                )
2074
2075        info["source"] = self.__class__.__name__
2076        info["stats"] = {
2077            "fetch_time": time.time() - t0,
2078            "num_tries": tries,
2079            "fetch_method": method,
2080            "next_kwargs": kwargs,
2081        }
2082        return Context(**info)
2083
2084
2085---
2086File: /coding/datasets/bigcodebench.py
2087---
2088
2089import re
2090import os
2091import bittensor as bt
2092from pydantic import BaseModel
2093from datasets import load_dataset
2094from langchain_openai import ChatOpenAI
2095
2096from .base import Dataset
2097from .prompts.bigcodebench import DATA_SYNTH_PROMPT
2098
2099class BigCodeBenchDataset(Dataset):
2100    name = "bigcodebench"
2101
2102    def __init__(
2103        self,
2104        config
2105    ):
2106        self.config = config
2107        self.instruct_ds = load_dataset(
2108            "bigcode/self-oss-instruct-sc2-instructions", split="train", streaming=True
2109        ).shuffle()
2110        self.instruct_iterset = iter(self.instruct_ds)
2111
2112        self.llm = ChatOpenAI(
2113            base_url=self.config.neuron.model_url,
2114            model_name=self.config.neuron.model_id,
2115            api_key=self.config.neuron.vllm_api_key,
2116            temperature=0.7,
2117            # max_tokens=12000
2118        )
2119
2120        self.buffer = []
2121
2122    def random(
2123        self,
2124        **kwargs,
2125    ):
2126        return self.get(
2127            **kwargs,
2128        )
2129
2130    def get(
2131        self,
2132        **kwargs,
2133    ):
2134        count = 0
2135        while len(self.buffer) == 0 and count < 10:
2136            count += 1
2137            row = next(self.instruct_iterset)
2138            seed = row["seed"]
2139            response = self.llm.invoke(DATA_SYNTH_PROMPT + "\n" + seed).content
2140            
2141            # Extract all Python code blocks from the content, including those with a newline after 'python'
2142            code_blocks = re.findall(r"```python\s*(.*?)```", response, re.DOTALL)
2143
2144            self.buffer.extend(code_blocks)
2145
2146        content = self.buffer.pop(0)
2147
2148        return {
2149            "title": "",
2150            "topic": "",
2151            "subtopic": "",
2152            "content": content,
2153            "internal_links": [],
2154            "external_links": [],
2155            "source": "GitHub",
2156            "tags": [],
2157            "extras": {},
2158        }
2159        
2160    def search(
2161        self,
2162    ):
2163        pass
2164
2165
2166---
2167File: /coding/datasets/pip.py
2168---
2169
2170# The MIT License (MIT)
2171# Copyright © 2024 Yuma Rao
2172# Copyright © 2023 Opentensor Foundation
2173# Copyright © 2024 Macrocosmos
2174
2175# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2176# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2177# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2178# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2179
2180# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2181# the Software.
2182
2183# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2184# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2185# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2186# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2187# DEALINGS IN THE SOFTWARE.
2188
2189import io
2190import os
2191import math
2192import random
2193import tarfile
2194import requests
2195
2196from typing import List
2197from pydantic import BaseModel
2198
2199from .base import Dataset
2200from coding.schemas import Context
2201from coding.schemas import File
2202from coding.helpers.selector import Selector
2203
2204
2205def fetch_pip_repo_contents(repo_name: str, size_limit: int = 10 * 1024 * 1024 ) -> List[File]:
2206    """
2207    Fetch the contents of a pip repository as a list of file objects.
2208    
2209    Parameters:
2210    - repo_name: The name of the pip repository.
2211    - size_limit: The maximum allowable size of the tarball in bytes.
2212    
2213    Returns:
2214    - A list of FileObject instances representing the files in the repository.
2215    
2216    Raises:
2217    - ValueError if the tarball size exceeds the specified limit or if there are issues fetching data.
2218    """
2219    # Fetch the latest release metadata from PyPI
2220    pypi_url = f"https://pypi.org/pypi/{repo_name}/json"
2221    response = requests.get(pypi_url)
2222    if response.status_code != 200:
2223        raise ValueError(f"Could not fetch repository data for {repo_name}")
2224    
2225    data = response.json()
2226    latest_version = data["info"]["version"]
2227    tarball_url = data["releases"][latest_version][-1]["url"]
2228    
2229    # Get the size of the tarball without downloading it
2230    head_response = requests.head(tarball_url)
2231    if head_response.status_code != 200:
2232        raise ValueError(f"Could not fetch tarball metadata for {repo_name}")
2233    
2234    content_length = int(head_response.headers.get('Content-Length', 0))
2235    if content_length > size_limit:
2236        raise ValueError(f"Tarball size ({content_length} bytes) exceeds the limit of {size_limit} bytes")
2237    
2238    # Download the tarball of the latest release
2239    tarball_response = requests.get(tarball_url)
2240    if tarball_response.status_code != 200:
2241        raise ValueError(f"Could not fetch tarball for {repo_name}")
2242    
2243    # Read the tarball contents
2244    tarball_file = io.BytesIO(tarball_response.content)
2245    tar = tarfile.open(fileobj=tarball_file)
2246    
2247    file_objects = []
2248    for member in tar.getmembers():
2249        if member.isfile():
2250            f = tar.extractfile(member)
2251            if f is not None:
2252                contents = f.read().decode('utf-8')
2253                # split the name to remove the package name
2254                file_objects.append(File(path='/'.join(member.name.split('/')[1:]), contents=contents))
2255    
2256    return file_objects
2257
2258def get_pip_repo_size(repo_name: str) -> int:
2259    """
2260    Get the size of the latest tarball for a given pip repository.
2261    
2262    Parameters:
2263    - repo_name: The name of the pip repository.
2264    
2265    Returns:
2266    - The size of the latest tarball in bytes.
2267    
2268    Raises:
2269    - ValueError if the repository data or tarball metadata cannot be fetched.
2270    """
2271    
2272    # Fetch the latest release metadata from PyPI
2273    pypi_url = f"https://pypi.org/pypi/{repo_name}/json"
2274    response = requests.get(pypi_url)
2275    if response.status_code != 200:
2276        raise ValueError(f"Could not fetch repository data for {repo_name}")
2277    
2278    data = response.json()
2279    latest_version = data["info"]["version"]
2280    tarball_url = data["releases"][latest_version][-1]["url"]
2281    
2282    # Get the size of the tarball without downloading it
2283    head_response = requests.head(tarball_url)
2284    if head_response.status_code != 200:
2285        raise ValueError(f"Could not fetch tarball metadata for {repo_name}")
2286    
2287    content_length = int(head_response.headers.get('Content-Length', 0))
2288    
2289    return content_length
2290
2291def get_total_pip_packages():
2292    url = "https://libraries.io/api/search"
2293    params = {
2294        "platforms": "pypi",
2295        "sort": "dependents_count",
2296        "per_page": 1,  # Get only one result to find out the total count
2297        "api_key": os.getenv('LIBRARIES_API_KEY', '45cc24a495c25a68a052e3f99af9a05a') # TODO remove the api key
2298    }
2299    
2300    response = requests.get(url, params=params)
2301    response.raise_for_status()
2302    total_packages = int(response.headers.get('total', 0))
2303    return total_packages
2304
2305def get_random_packages(n=100):
2306    url = "https://libraries.io/api/search"
2307    total_packages = get_total_pip_packages()
2308    total_pages = math.ceil(total_packages / n)
2309    random_offset = random.randint(0, total_pages - n)
2310    
2311    params = {
2312        "platforms": "pypi",
2313        "sort": "dependents_count",
2314        "per_page": n,
2315        "offset": random_offset,
2316        "api_key": os.getenv('LIBRARIES_API_KEY', '45cc24a495c25a68a052e3f99af9a05a') # TODO remove the api key
2317    }
2318    
2319    response = requests.get(url, params=params)
2320    response.raise_for_status()
2321    data = response.json()
2322    
2323    return [package["name"] for package in data]
2324
2325
2326
2327
2328class PipDataset(Dataset):
2329    name = "pip"
2330    def __init__(
2331        self,
2332        seed=None,
2333    ):
2334        if seed is None:
2335            seed = random.randint(0, 1000)
2336        self.seed = seed
2337
2338    def get(self, n=100, selector: Selector = None): 
2339        for _ in range(300):
2340            
2341            packages = get_random_packages(n=n)
2342            package_name = selector(packages)
2343            if not get_pip_repo_size(package_name) < 10 * 1024 * 1024: # 10MB
2344                continue
2345            return dict(
2346                title = package_name,
2347                source = "pip",
2348                # files= fetch_pip_repo_contents(package_name)
2349            )
2350        raise Exception("Failed to find a valid pip package")
2351    
2352    def search(
2353        self, query, selector: Selector = None,  **kwargs
2354    ):
2355        pass
2356    
2357    def random(self, n=100, selector: Selector = None, **kwargs):
2358        return self.get(n=100, selector=selector)
2359
2360
2361
2362---
2363File: /coding/datasets/swe.py
2364---
2365
2366# The MIT License (MIT)
2367# Copyright © 2024 Yuma Rao
2368# Copyright © 2023 Opentensor Foundation
2369# Copyright © 2024 Macrocosmos
2370
2371# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2372# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2373# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2374# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2375
2376# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2377# the Software.
2378
2379# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2380# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2381# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2382# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2383# DEALINGS IN THE SOFTWARE.
2384
2385import os
2386import random
2387import requests
2388
2389from fastcore.xtras import obj2dict
2390from swebench.collect.build_dataset import create_instance
2391
2392from .base import Dataset
2393from coding.helpers.selector import Selector
2394from coding.helpers.swebench import Repo as SWERepo
2395
2396
2397def get_package_stats(package_name: str):
2398    package_url = f"https://pypi.org/pypi/{package_name}/json"
2399    package_github = None
2400    response = requests.get(package_url)
2401    if response.status_code != 200:
2402        raise Exception(f"Failed to get package data from URL: {package_url}")
2403    response = response.json()
2404    if "info" in response:
2405        if (
2406            "Source" in response["info"]["project_urls"]
2407            and "github" in response["info"]["project_urls"]["Source"]
2408        ):
2409            package_github = response["info"]["project_urls"]["Source"]
2410        elif (
2411            "Homepage" in response["info"]["project_urls"]
2412            and "github" in response["info"]["project_urls"]["Homepage"]
2413        ):
2414            package_github = response["info"]["project_urls"]["Homepage"]
2415    if not package_github:
2416        raise Exception(f"No github link found for package: {package_name}")
2417
2418    return {
2419        "name": package_name,
2420        "url": package_url,
2421        "github": package_github,
2422    }
2423
2424
2425def get_top_pip_packages():
2426    response = requests.get(
2427        "https://hugovk.github.io/top-pypi-packages/top-pypi-packages-30-days.min.json"
2428    )
2429    packages = [row["project"] for row in response.json()["rows"]]
2430    return packages
2431
2432
2433class SWEBenchDataset(Dataset):
2434    name = "swebench"
2435
2436    def __init__(
2437        self,
2438    ):
2439        pass
2440
2441    def get(self, n=100, selector: Selector = Selector()) -> dict:
2442        package_name = selector(get_top_pip_packages())
2443        package_info = get_package_stats(package_name)
2444        token = os.environ.get("GITHUB_TOKEN", None)
2445        if not token:
2446            raise Exception("GITHUB_TOKEN not set")
2447        repo = SWERepo(
2448            package_info["github"].split("/")[-2],
2449            package_info["github"].split("/")[-1],
2450            token,
2451        )
2452
2453        # Check repo size before proceeding
2454        if repo.size > 1024 * 1024 * 1024:  # 1GB in bytes
2455            raise Exception(f"Repository {package_info['github']} is too large (>1GB)")
2456
2457        valid_pull = None
2458        err_count = 0
2459        pulls = [pull for pull in repo.get_all_pulls(state="closed")]
2460        random.shuffle(pulls)
2461        for pull in pulls:
2462            try:
2463                if valid_pull or err_count > 5:
2464                    break
2465                resolved_issues = repo.extract_resolved_issues(pull)
2466                setattr(pull, "resolved_issues", resolved_issues)
2467                if len(resolved_issues) > 0:
2468                    valid_pull = obj2dict(pull)
2469            except:
2470                err_count += 1
2471
2472        if not valid_pull:
2473            raise Exception(f"Could not get a valid SWE pull for {package_info['github']}")
2474        pull_data = create_instance(repo, valid_pull)
2475        diff_text = pull_data["patch"]
2476        return {
2477            "topic": pull_data["problem_statement"],
2478            "title": f'{package_info["github"].split("/")[-2]}/{package_info["github"].split("/")[-1]}',
2479            "content": diff_text,
2480            "extras": dict(pull_number=pull_data["pull_number"], base_commit=pull_data["base_commit"]),
2481        }
2482    def search(self, query, selector: Selector = None, **kwargs):
2483        pass
2484
2485    def random(self, n=100, selector: Selector = None, **kwargs):
2486        return self.get(n=100, selector=selector)
2487
2488
2489---
2490File: /coding/datasets/thestack.py
2491---
2492
2493# The MIT License (MIT)
2494# Copyright © 2024 Yuma Rao
2495# Copyright © 2023 Opentensor Foundation
2496# Copyright © 2024 Macrocosmos
2497
2498# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
2499# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
2500# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
2501# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
2502
2503# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
2504# the Software.
2505
2506# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
2507# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
2508# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
2509# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2510# DEALINGS IN THE SOFTWARE.
2511
2512import os
2513import re
2514import boto3
2515import random
2516import itertools
2517import numpy as np
2518from smart_open import open
2519from datasets import load_dataset, Dataset, interleave_datasets
2520
2521from .base import Dataset
2522from coding.schemas import Context
2523from coding.helpers.selector import Selector
2524
2525LANGUAGES = {
2526    "C++": {
2527        "keywords": [
2528            "auto",
2529            "break",
2530            "case",
2531            "char",
2532            "const",
2533            "continue",
2534            "default",
2535            "do",
2536            "double",
2537            "else",
2538            "enum",
2539            "extern",
2540            "float",
2541            "for",
2542            "goto",
2543            "if",
2544            "int",
2545            "long",
2546            "register",
2547            "return",
2548            "short",
2549            "signed",
2550            "sizeof",
2551            "static",
2552            "struct",
2553            "switch",
2554            "typedef",
2555            "union",
2556            "unsigned",
2557            "void",
2558            "volatile",
2559            "while",
2560        ],
2561        "libraries": [
2562            "iostream",
2563            "fstream",
2564            "string",
2565            "vector",
2566            "map",
2567            "set",
2568            "algorithm",
2569            "cmath",
2570            "cstdio",
2571            "cstdlib",
2572            "ctime",
2573            "cstring",
2574            "cassert",
2575            "cctype",
2576            "cerrno",
2577            "cfloat",
2578            "ciso646",
2579            "climits",
2580            "clocale",
2581            "cmath",
2582            "csetjmp",
2583            "csignal",
2584            "cstdarg",
2585            "cstddef",
2586            "cstdio",
2587            "cstdlib",
2588            "cstring",
2589            "ctime",
2590            "cwchar",
2591            "cwctype",
2592            "complex",
2593            "deque",
2594            "exception",
2595            "fstream",
2596            "functional",
2597            "iomanip",
2598            "ios",
2599            "iosfwd",
2600            "iostream",
2601            "istream",
2602            "iterator",
2603            "limits",
2604            "list",
2605            "locale",
2606            "map",
2607            "memory",
2608            "new",
2609            "numeric",
2610            "ostream",
2611            "queue",
2612            "set",
2613            "sstream",
2614            "stack",
2615            "stdexcept",
2616            "streambuf",
2617            "string",
2618            "typerow",
2619            "utility",
2620            "valarray",
2621            "vector",
2622        ],
2623        "comments": ["//", "/*", "*/"],
2624        "multiline_comments": [("/*", "*/")],
2625    },
2626    "Dockerfile": {
2627        "keywords": [
2628            "from",
2629            "maintainer",
2630            "run",
2631            "cmd",
2632            "expose",
2633            "env",
2634            "add",
2635            "copy",
2636            "entrypoint",
2637            "volume",
2638            "user",
2639            "workdir",
2640            "onbuild",
2641        ],
2642        "libraries": [],
2643        "comments": ["#"],
2644        "multiline_comments": [],
2645    },
2646    "HTML": {
2647        "keywords": [
2648            "div",
2649            "span",
2650            "input",
2651            "ul",
2652            "body",
2653            "tag",
2654            "html",
2655            "head",
2656            "title",
2657            "meta",
2658            "link",
2659            "script",
2660            "style",
2661            "a",
2662            "img",
2663            "table",
2664            "label",
2665        ],
2666        "libraries": [],
2667        "comments": ["<!--", "-->"],
2668        "multiline_comments": [("<!--", "-->")],
2669    },
2670    "Java": {
2671        "keywords": [
2672            "abstract",
2673            "assert",
2674            "boolean",
2675            "break",
2676            "byte",
2677            "case",
2678            "catch",
2679            "char",
2680            "class",
2681            "continue",
2682            "default",
2683            "do",
2684            "double",
2685            "else",
2686            "enum",
2687            "extends",
2688            "final",
2689            "finally",
2690            "float",
2691            "for",
2692            "if",
2693            "implements",
2694            "import",
2695            "instanceof",
2696            "int",
2697            "interface",
2698            "long",
2699            "native",
2700            "new",
2701            "package",
2702            "private",
2703            "protected",
2704            "public",
2705            "return",
2706            "short",
2707            "static",
2708            "strictfp",
2709            "super",
2710            "switch",
2711            "synchronized",
2712            "this",
2713            "throw",
2714            "throws",
2715            "transient",
2716            "try",
2717            "void",
2718            "volatile",
2719            "while",
2720        ],
2721        "libraries": [
2722            "java.awt",
2723            "java.awt.event",
2724            "java.io",
2725            "java.lang",
2726            "java.math",
2727            "java.net",
2728            "java.text",
2729            "java.util",
2730            "javax.swing",
2731        ],
2732        "comments": ["//", "/*", "*/", "*"],
2733        "multiline_comments": [("/*", "*/")],
2734    },
2735    "JavaScript": {
2736        "keywords": [
2737            "abstract",
2738            "arguments",
2739            "boolean",
2740            "break",
2741            "byte",
2742            "case",
2743            "catch",
2744            "char",
2745            "class",
2746            "const",
2747            "continue",
2748            "debugger",
2749            "default",
2750            "delete",
2751            "do",
2752            "double",
2753            "else",
2754            "enum",
2755            "eval",
2756            "export",
2757            "extends",
2758            "false",
2759            "final",
2760            "finally",
2761            "float",
2762            "for",
2763            "function",
2764            "goto",
2765            "if",
2766            "implements",
2767            "import",
2768            "in",
2769            "instanceof",
2770            "int",
2771            "interface",
2772            "let",
2773            "long",
2774            "native",
2775            "module.exports" "new",
2776            "null",
2777            "package",
2778            "private",
2779            "protected",
2780            "public",
2781            "return",
2782            "short",
2783            "static",
2784            "super",
2785            "switch",
2786            "synchronized",
2787            "this",
2788            "throw",
2789            "throws",
2790            "transient",
2791            "true",
2792            "try",
2793            "typeof",
2794            "var",
2795            "void",
2796            "volatile",
2797            "while",
2798            "with",
2799            "yield",
2800        ],
2801        "libraries": [
2802            "react",
2803            "express",
2804            "mongoose",
2805            "axios",
2806            "redux",
2807            "react-redux",
2808            "react-router-dom",
2809            "react-dom",
2810            "react-scripts",
2811            "material-ui",
2812        ],
2813        "comments": ["//", "/*", "*/"],
2814        "multiline_comments": [("/*", "*/")],
2815    },
2816    "Python": {
2817        "keywords": [
2818            "False",
2819            "None",
2820            "True",
2821            "and",
2822            "as",
2823            "assert",
2824            "break",
2825            "class",
2826            "continue",
2827            "def",
2828            "del",
2829            "elif",
2830            "else",
2831            "except",
2832            "finally",
2833            "for",
2834            "from",
2835            "global",
2836            "if",
2837            "import",
2838            "in",
2839            "is",
2840            "lambda",
2841            "nonlocal",
2842            "not",
2843            "or",
2844            "pass",
2845            "raise",
2846            "return",
2847            "try",
2848            "while",
2849            "with",
2850            "yield",
2851        ],
2852        "libraries": [
2853            "numpy",
2854            "pandas",
2855            "matplotlib",
2856            "seaborn",
2857            "scipy",
2858            "sklearn",
2859            "tensorflow",
2860            "keras",
2861            "pytorch",
2862            "django",
2863            "flask",
2864            "requests",
2865            "bs4",
2866            "selenium",
2867            "pyautogui",
2868            "pyperclip",
2869            "pyinputplus",
2870            "pillow",
2871        ],
2872        "comments": ["#"],
2873        "multiline_comments": [('"""', '"""'), ("'''", "'''")],
2874    },
2875    "SQL": {
2876        "keywords": [
2877            "add",
2878            "all",
2879            "alter",
2880            "and",
2881            "any",
2882            "as",
2883            "asc",
2884            "backup",
2885            "between",
2886            "case",
2887            "check",
2888            "column",
2889            "constraint",
2890            "create",
2891            "database",
2892            "default",
2893            "delete",
2894            "desc",
2895            "distinct",
2896            "drop",
2897            "exec",
2898            "exists",
2899            "foreign",
2900            "from",
2901            "full",
2902            "group",
2903            "having",
2904            "in",
2905            "index",
2906            "inner",
2907            "insert",
2908            "into",
2909            "is",
2910            "join",
2911            "key",
2912            "left",
2913            "like",
2914            "limit",
2915            "not",
2916            "null",
2917            "on",
2918            "or",
2919            "order",
2920            "outer",
2921            "primary",
2922            "procedure",
2923            "right",
2924            "rownum",
2925            "select",
2926            "set",
2927            "table",
2928            "top",
2929            "truncate",
2930            "union",
2931            "unique",
2932            "update",
2933            "values",
2934            "view",
2935            "where",
2936        ],
2937        "comments": ["--", "/*", "*/"],
2938    },
2939    "Shell": {
2940        "keywords": [
2941            "alias",
2942            "bg",
2943            "bind",
2944            "break",
2945            "builtin",
2946            "caller",
2947            "cd",
2948            "command",
2949            "compgen",
2950            "complete",
2951            "continue",
2952            "declare",
2953            "dirs",
2954            "disown",
2955            "echo",
2956            "enable",
2957            "eval",
2958            "exec",
2959            "exit",
2960            "export",
2961            "false",
2962            "fc",
2963            "fg",
2964            "getopts",
2965            "hash",
2966            "help",
2967            "history",
2968            "jobs",
2969            "kill",
2970            "let",
2971            "local",
2972            "logout",
2973            "popd",
2974            "printf",
2975            "pushd",
2976            "pwd",
2977            "read",
2978            "readonly",
2979            "return",
2980            "set",
2981            "shift",
2982            "shopt",
2983            "source",
2984            "suspend",
2985            "test",
2986            "times",
2987            "trap",
2988            "true",
2989            "type",
2990            "typeset",
2991            "ulimit",
2992            "umask",
2993            "unalias",
2994            "unset",
2995            "wait",
2996        ],
2997        "comments": ["#"],
2998        "multiline_comments": [(":'", "'")],
2999    },
3000}
3001
3002
3003def convert_to_python3(code: str) -> str:
3004    """
3005    Convert Python 2/3 code to Python 3 code.
3006
3007    Args:
3008    - code (str): A string containing Python 2/3 code.
3009
3010    Returns:
3011    - str: A string containing Python 3 code.
3012    """
3013
3014    def replace_print_statement(match):
3015        return f"print({match.group(1)})"
3016
3017    code = re.sub(r"print (.*)", replace_print_statement, code)
3018
3019    # Replace xrange with range
3020    code = code.replace("xrange", "range")
3021
3022    return code
3023
3024
3025def process_repo_row(row):
3026    for file in row["files"]:
3027        blob_id = file["blob_id"]
3028        src_encoding = file["src_encoding"]
3029        session = boto3.Session(
3030            aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
3031            aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
3032        )
3033        s3 = session.client("s3")
3034        s3_url = f"s3://softwareheritage/content/{blob_id}"
3035
3036        with open(
3037            s3_url, "rb", compression=".gz", transport_params={"client": s3}
3038        ) as fin:
3039            file["content"] = fin.read().decode(src_encoding)
3040
3041    return row
3042
3043
3044def process_row(row):
3045    blob_id = row["blob_id"]
3046    src_encoding = row["src_encoding"]
3047    session = boto3.Session(
3048        aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"],
3049        aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"],
3050    )
3051    s3 = session.client("s3")
3052    s3_url = f"s3://softwareheritage/content/{blob_id}"
3053
3054    with open(s3_url, "rb", compression=".gz", transport_params={"client": s3}) as fin:
3055        content = fin.read().decode(src_encoding)
3056
3057    row["code"] = content
3058    return row
3059
3060
3061def filter_comments(code, language):
3062    if language not in LANGUAGES:
3063        return code
3064    # Filter out multiline comments
3065    if "multiline_comments" in LANGUAGES[language]:
3066        for start_tag, end_tag in LANGUAGES[language]["multiline_comments"]:
3067            code = re.sub(
3068                rf"{re.escape(start_tag)}.*?{re.escape(end_tag)}",
3069                "",
3070                code,
3071                flags=re.DOTALL,
3072            )
3073
3074    # Filter out single-line comments
3075    lines = []
3076    for line in code.splitlines():
3077        if any(
3078            line.strip().startswith(symbol)
3079            for symbol in LANGUAGES[language]["comments"]
3080        ):
3081            continue
3082        lines.append(line.lower())
3083
3084    return "\n".join(lines)
3085
3086
3087class TheStackDataset(Dataset):
3088    name = "thestack"
3089
3090    def __init__(
3091        self,
3092        seed=None,
3093        languages=None,
3094    ):
3095
3096        if seed is None:
3097            seed = random.randint(0, 1000)
3098        self.seed = seed
3099
3100        if languages is None:
3101            languages = list(LANGUAGES.keys())
3102        self.languages = languages
3103
3104        # self.dataset = cache_dataset(dataset_id=dataset_id, seed=seed)
3105        datasets = []
3106        for language in [
3107            "Python",
3108            # "JavaScript",
3109            # "TypeScript",
3110            # "Go",
3111            # "Java",
3112            # "C++",
3113            # "C",
3114            # "SQL",
3115            # "Shell",
3116        ]:
3117            datasets.append(
3118                load_dataset(
3119                    "bigcode/the-stack-v2",
3120                    language,
3121                    split="train",
3122                    streaming=True,
3123                )
3124            )
3125        # shuffle the datasets
3126        for dataset in datasets:
3127            dataset = dataset.shuffle()
3128        self.stack_dataset = interleave_datasets(datasets)
3129        self.stack_dataset = self.stack_dataset.shuffle()
3130        self.stack_dataset = self.stack_dataset.map(lambda row: process_row(row))
3131        self.stack_iterset = iter(self.stack_dataset)
3132
3133        self.stack_repo_dataset = load_dataset(
3134            "bigcode/the-stack-v2-train-smol-ids", split="train", streaming=True
3135        )
3136        self.stack_repo_dataset = self.stack_repo_dataset.shuffle()
3137        self.stack_repo_iterset = iter(self.stack_repo_dataset)
3138
3139    def random(
3140        self,
3141        min_lines=10,
3142        max_lines=3000,
3143        selector: Selector = None,
3144        include_sibling_docs=False,
3145        min_sibling_docs=1,
3146        **kwargs,
3147    ):
3148        return self.get(
3149            min_lines,
3150            max_lines,
3151            selector,
3152            include_sibling_docs,
3153            min_sibling_docs,
3154            **kwargs,
3155        )
3156
3157    def get(
3158        self,
3159        min_lines=25,
3160        max_lines=3000,
3161        selector: Selector = None,
3162        include_sibling_docs=False,
3163        min_sibling_docs=1,
3164        **kwargs,
3165    ):
3166        content = None
3167        if include_sibling_docs:
3168            row = next(self.stack_repo_iterset)
3169            if not row["gha_language"]:
3170                row["gha_language"] = ""
3171        else:
3172            row = next(self.stack_iterset)
3173            if not (min_lines <= len(row["code"].splitlines()) <= max_lines):
3174                return None
3175            content = row["code"]
3176
3177        sibling_docs = []
3178        if include_sibling_docs:
3179            if (
3180                row["num_files"] < min_sibling_docs
3181                or row["num_files"] > 15  # TODO modify this eventually to be different
3182                or len(row["files"]) < 2
3183            ):
3184                return None
3185            row = process_repo_row(row)
3186            randindex = random.randint(1, len(row["files"]) - 1)
3187            # choose all but the random index
3188            for file in row["files"][:randindex] + row["files"][randindex + 1 :]:
3189                sibling_docs.append(
3190                    Context(
3191                        title=file["path"],
3192                        content=file["content"],
3193                        topic=row["gha_language"],
3194                    )
3195                )
3196            content = row["files"][randindex]["content"]
3197
3198        if ("language" in row and row["language"] == "Python") or (
3199            "gha_language" in row and row["gha_language"] == "Python"
3200        ):
3201            content = convert_to_python3(content)
3202        
3203        if len(content.splitlines()) < min_lines or len(content.splitlines()) > max_lines:
3204            return None
3205        
3206        for sibling_doc in sibling_docs:
3207            if len(sibling_doc.content.splitlines()) < min_lines or len(sibling_doc.content.splitlines()) > max_lines:
3208                return None
3209        
3210        return {
3211            "title": row["repo_name"],  # name of the repo
3212            "topic": (
3213                row["language"] if "language" in row else row["gha_language"]
3214            ),  # language of the code
3215            "subtopic": "",
3216            "content": filter_comments(
3217                content, row["language"] if "language" in row else row["gha_language"]
3218            ),
3219            "internal_links": [row["repo_name"]],
3220            "external_links": [],
3221            "source": "GitHub",
3222            "tags": [
3223                row["language"] if "language" in row else row["gha_language"],
3224                row["repo_name"],
3225                "",
3226            ],
3227            "extras": {
3228                "sibling_docs": sibling_docs,
3229            },
3230        }
3231
3232    def search(
3233        self,
3234        query,
3235        column="path",
3236        min_lines=5,
3237        max_lines=100,
3238        selector: Selector = None,
3239        **kwargs,
3240    ):
3241        mask = np.array(self.dataset[column]) == query
3242        filtered_dataset = iter(self.dataset.select(np.where(mask)[0]))
3243
3244        return [
3245            {
3246                "title": row["repo_name"],  # name of the repo
3247                "topic": row["language"],  # language of the code
3248                "subtopic": row["path"],
3249                "content": (
3250                    convert_to_python3(filter_comments(row["code"], row["language"]))
3251                    if row["language"] == "Python"
3252                    else filter_comments(row["code"], row["language"])
3253                ),
3254                "internal_links": [row["repo_name"], row["path"], row["language"]],
3255                "external_links": [],  # TODO complete
3256                "source": "GitHub",
3257                "tags": [row["language"], row["repo_name"], row["path"]],
3258                "extras": {"size": row["size"], "license": row["license"]},
3259            }
3260            for row in filtered_dataset
3261        ]
3262
3263    def extract_keywords(self, code, language, field):
3264        matches = set()
3265
3266        # check which keywords and libraries are present in the code
3267        for keyword in LANGUAGES[language].get(field, []):
3268            if re.search(r"\b" + keyword + r"\b", code):
3269                matches.add(keyword)
3270
3271        return matches
3272
3273    def get_special_contents(self, code, language, remove_comments=True):
3274        if remove_comments:
3275            code = filter_comments(code, language)
3276
3277        present_libraries = self.extract_keywords(code, language, "libraries")
3278        present_keywords = self.extract_keywords(code, language, "keywords")
3279
3280        return present_keywords, present_libraries
3281
3282
3283
3284---
3285File: /coding/finetune/llm/__init__.py
3286---
3287
3288
3289
3290
3291---
3292File: /coding/finetune/llm/app.py
3293---
3294
3295import os
3296import asyncio
3297from fastapi import FastAPI, HTTPException, Depends
3298from pydantic import BaseModel
3299from typing import Optional, Dict, List
3300from dotenv import load_dotenv
3301
3302# ------------------------------
3303#  LangChain-based LLM Imports
3304# ------------------------------
3305from langchain_anthropic import ChatAnthropic
3306from langchain_google_genai import ChatGoogleGenerativeAI
3307from langchain_openai import ChatOpenAI, OpenAIEmbeddings
3308
3309load_dotenv("../../../.env")
3310
3311if not os.getenv("LLM_AUTH_KEY"):
3312    raise ValueError("LLM_AUTH_KEY environment variable not set")
3313
3314
3315# ------------------------------
3316#      Global Variables
3317# ------------------------------
3318token_usage: Dict[str, int] = {}
3319current_key: Optional[str] = None
3320
3321# FastAPI App
3322app = FastAPI()
3323
3324
3325# ------------------------------
3326#       Pydantic Models
3327# ------------------------------
3328class InitRequest(BaseModel):
3329    key: str
3330
3331class LLMRequest(BaseModel):
3332    query: str
3333    llm_name: str
3334
3335class LLMResponse(BaseModel):
3336    result: str
3337    total_tokens: int
3338
3339class EmbeddingRequest(BaseModel):
3340    query: str
3341
3342class EmbeddingResponse(BaseModel):
3343    vector: List[float]
3344
3345
3346# ------------------------------
3347#       Auth Dependency
3348# ------------------------------
3349async def verify_auth(auth_key: str = Depends(lambda: os.getenv("LLM_AUTH_KEY"))):
3350    if not auth_key:
3351        raise HTTPException(
3352            status_code=500,
3353            detail="LLM_AUTH_KEY environment variable not set"
3354        )
3355    return auth_key
3356
3357
3358# ------------------------------
3359#   Initialize / Reset / Count
3360# ------------------------------
3361@app.post("/init")
3362async def init_key(request: InitRequest, auth_key: str = Depends(verify_auth)):
3363    """Initialize token tracking for a new key and set as current."""
3364    global current_key
3365    if request.key not in token_usage:
3366        token_usage[request.key] = 0
3367    current_key = request.key
3368    return {"message": f"Set active key to {request.key}"}
3369
3370@app.post("/reset")
3371async def reset_count(auth_key: str = Depends(verify_auth)):
3372    """Reset token count for current key."""
3373    global current_key
3374    if not current_key:
3375        raise HTTPException(
3376            status_code=400,
3377            detail="No active key. Call /init endpoint first."
3378        )
3379    token_usage[current_key] = 0
3380    return {"message": f"Reset token count for key {current_key}"}
3381
3382@app.get("/count")
3383async def get_count(auth_key: str = Depends(verify_auth)):
3384    """Get current token count."""
3385    global current_key
3386    if not current_key:
3387        raise HTTPException(
3388            status_code=400,
3389            detail="No active key. Call /init endpoint first."
3390        )
3391    return {"key": current_key, "count": token_usage[current_key]}
3392
3393
3394# ------------------------------
3395#   Helper: Async LLM Invoker
3396# ------------------------------
3397async def ainvoke_with_retry(llm, query: str, max_retries: int = 50, initial_delay: int = 1):
3398    """
3399    Invoke the LLM asynchronously with exponential backoff on rate-limit or server errors.
3400    Returns the response if successful; raises Exception after max_retries.
3401    """
3402    delay = initial_delay
3403    last_exception = None
3404
3405    for attempt in range(max_retries):
3406        try:
3407            response = await llm.ainvoke(query)
3408            return response
3409        except Exception as e:
3410            # Check if it's a rate-limit or server error
3411            if "429" in str(e) or "529" in str(e):
3412                last_exception = e
3413                if attempt < max_retries - 1:
3414                    # Exponential backoff
3415                    await asyncio.sleep(delay)
3416                    delay *= 2
3417                else:
3418                    # Retries exhausted
3419                    raise
3420            else:
3421                # Some other error - don't keep retrying
3422                raise
3423
3424    # If we exit the loop without returning, raise whatever last exception we had
3425    if last_exception:
3426        raise last_exception
3427    else:
3428        raise HTTPException(status_code=500, detail="Unknown error invoking LLM")
3429
3430
3431# ------------------------------
3432#          Call LLM
3433# ------------------------------
3434@app.post("/call", response_model=LLMResponse)
3435async def call_llm(request: LLMRequest):
3436    """Call one of the registered LLMs. If repeated failures, fallback to 'gpt-4o'."""
3437    global current_key, token_usage
3438
3439    # Models dictionary
3440    models = {
3441        "gpt-4o": ChatOpenAI(model="gpt-4o", max_tokens=16384),
3442        "gpt-3.5-turbo": ChatOpenAI(model="gpt-3.5-turbo", max_tokens=16384),
3443        "gpt-4o-mini": ChatOpenAI(model="gpt-4o-mini", max_tokens=16384),
3444        "claude-3-5-sonnet": ChatAnthropic(model="claude-3-5-sonnet-latest", max_tokens=8912),
3445        "gemini-2.0-flash-exp": ChatGoogleGenerativeAI(model="gemini-2.0-flash-exp", max_tokens=8912),
3446    }
3447
3448    try:
3449        if not current_key:
3450            # If no key was initialized, default to "test" so code doesn't break
3451            current_key = "test"
3452            token_usage[current_key] = 0
3453
3454        # Try to retrieve requested model; fallback to "gpt-4o" if not found
3455        requested_llm = models.get(request.llm_name, models["gpt-4o"])
3456        fallback_llm = models["gpt-4o"]
3457
3458        # --- Step 1: Try the requested LLM ---
3459        try:
3460            response = await ainvoke_with_retry(requested_llm, request.query)
3461        except Exception:
3462            # If the requested LLM fails after max retries, fallback
3463            response = await ainvoke_with_retry(fallback_llm, request.query)
3464
3465        # Extract tokens from usage metadata (some LLMs may not provide it)
3466        tokens = response.usage_metadata.get("total_tokens", 0)
3467
3468        # Update token usage
3469        token_usage[current_key] += tokens
3470
3471        return LLMResponse(
3472            result=response.content,
3473            total_tokens=token_usage[current_key]
3474        )
3475    except Exception as e:
3476        raise HTTPException(status_code=500, detail=str(e))
3477
3478
3479# ------------------------------
3480#        Embeddings
3481# ------------------------------
3482@app.post("/embed", response_model=EmbeddingResponse)
3483async def get_embeddings(request: EmbeddingRequest):
3484    """
3485    Returns embeddings vector for the given input query.
3486    """
3487    embedder = OpenAIEmbeddings(model="text-embedding-3-small")
3488    try:
3489        # embed_query is often synchronous in many libraries; if there's an async version, use that instead.
3490        vector = embedder.embed_query(request.query)
3491        return EmbeddingResponse(vector=vector)
3492    except Exception as e:
3493        raise HTTPException(status_code=500, detail=str(e))
3494
3495
3496# ------------------------------
3497#      Run via Uvicorn
3498# ------------------------------
3499if __name__ == "__main__":
3500    import uvicorn
3501    uvicorn.run(app, host="0.0.0.0", port=25000)
3502
3503
3504
3505---
3506File: /coding/finetune/llm/client.py
3507---
3508
3509import os
3510import requests
3511
3512class LLMClient:
3513    def __init__(self, base_url: str = f"http://{os.getenv('DOCKER_HOST_IP', 'localhost')}:25000"):
3514        """Initialize LLM client with API server URL"""
3515        self.base_url = base_url.rstrip("/")
3516
3517    def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
3518        """
3519        Call LLM API endpoint
3520
3521        Args:
3522            query (str): The prompt/query to send to the LLM
3523            llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
3524
3525        Returns:
3526            tuple[str, int]: (Generated response text, Total tokens used for this key)
3527
3528        Raises:
3529            requests.exceptions.RequestException: If API call fails
3530        """
3531        payload = {"query": query, "llm_name": llm_name}
3532
3533        response = requests.post(f"{self.base_url}/call", json=payload)
3534        response.raise_for_status()
3535
3536        result = response.json()
3537        return result["result"], result["total_tokens"]
3538
3539    def embed(self, query: str) -> list[float]:
3540        """
3541        Get embeddings for text using the embedding API endpoint
3542
3543        Args:
3544            query (str): The text to get embeddings for
3545
3546        Returns:
3547            list[float]: Vector embedding of the input text
3548
3549        Raises:
3550            requests.exceptions.RequestException: If API call fails
3551        """
3552        payload = {"query": query}
3553
3554        response = requests.post(f"{self.base_url}/embed", json=payload)
3555        response.raise_for_status()
3556
3557        result = response.json()
3558        return result["vector"]
3559
3560
3561---
3562File: /coding/finetune/llm/manager.py
3563---
3564
3565import os
3566import requests
3567from typing import Optional, Dict, Any
3568from urllib.parse import urljoin
3569
3570class LLMManager:
3571
3572    """Manager for interacting with LLM API endpoints"""
3573    
3574    def __init__(self, base_url: str = f"http://localhost:25000"):
3575        """
3576        Initialize LLM manager
3577        
3578        Args:
3579            base_url: Base URL of LLM API server
3580        
3581        Raises:
3582            ValueError: If LLM_AUTH_KEY environment variable is not set
3583        """
3584        self.base_url = base_url.rstrip('/')
3585        self.auth_key = os.getenv("LLM_AUTH_KEY")
3586        if not self.auth_key:
3587            raise ValueError("LLM_AUTH_KEY environment variable not set")
3588        self.current_key: Optional[str] = None
3589        
3590    def _make_request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
3591        """
3592        Make HTTP request to API endpoint
3593        
3594        Args:
3595            method: HTTP method (get, post, etc)
3596            endpoint: API endpoint path
3597            **kwargs: Additional arguments passed to requests
3598            
3599        Returns:
3600            Dict containing API response
3601            
3602        Raises:
3603            requests.exceptions.RequestException: If request fails
3604        """
3605        url = urljoin(f"{self.base_url}/", endpoint.lstrip('/'))
3606        headers = kwargs.pop('headers', {})
3607        headers['Authorization'] = self.auth_key
3608        
3609        response = requests.request(
3610            method,
3611            url,
3612            headers=headers,
3613            **kwargs
3614        )
3615        response.raise_for_status()
3616        return response.json()
3617
3618    def init_key(self, key: str) -> Dict[str, str]:
3619        """
3620        Initialize token tracking for a key
3621        
3622        Args:
3623            key: Key to initialize
3624            
3625        Returns:
3626            Dict containing initialization status
3627        """
3628        result = self._make_request(
3629            'post',
3630            'init',
3631            json={'key': key}
3632        )
3633        self.current_key = key
3634        return result
3635
3636    def reset_count(self) -> Dict[str, str]:
3637        """
3638        Reset token count for current key
3639        
3640        Returns:
3641            Dict containing reset status
3642        """
3643        return self._make_request('post', 'reset')
3644
3645    def get_count(self) -> Dict[str, Any]:
3646        """
3647        Get current token count
3648        
3649        Returns:
3650            Dict containing current key and count
3651        """
3652        return self._make_request('get', 'count')
3653
3654
3655
3656
3657---
3658File: /coding/finetune/swe-server/runner.py
3659---
3660
3661import os
3662import submission
3663
3664swe_instance = submission.SWE()
3665
3666def run_swe(repo_location, issue_description):
3667    return swe_instance(repo_location, issue_description)
3668
3669if __name__ == "__main__":
3670    repo_location = "/app/repo"
3671    issue_description = os.getenv("ISSUE_DESCRIPTION")
3672    result = run_swe(repo_location, issue_description)
3673    print("Patch: ", result.model_dump())
3674
3675
3676---
3677File: /coding/finetune/swe-server/server.py
3678---
3679
3680from fastapi import FastAPI, HTTPException
3681from pydantic import BaseModel
3682import submission
3683
3684app = FastAPI()
3685
3686# Initialize the LLM class from submission.py
3687
3688swe_instance = submission.SWE()
3689
3690class CallRequest(BaseModel):
3691    repo_location: str
3692    issue_description: str
3693
3694@app.post("/call")
3695async def call_swe(request: CallRequest) -> dict:
3696    try:
3697        # Run the LLM object with the given inputs
3698        result = swe_instance(request.repo_location, request.issue_description)
3699        return {"result": result.model_dump()}
3700    except Exception as e:
3701        raise HTTPException(status_code=500, detail=str(e))
3702
3703
3704if __name__ == "__main__":
3705    import uvicorn
3706
3707    uvicorn.run(app, host="0.0.0.0", port=3000)
3708
3709
3710
3711---
3712File: /coding/finetune/swe-server/swebase.py
3713---
3714
3715import os
3716import requests
3717from pydantic import BaseModel
3718from abc import ABC, abstractmethod
3719
3720class Edit(BaseModel):
3721    file_name: str
3722    line_number: int
3723    line_content: str
3724    new_line_content: str
3725
3726class Patch(BaseModel):
3727    edits: list[Edit]
3728
3729# if host ip is localhost itll fail, need to get docker host ip
3730class LLMClient:
3731    def __init__(self, base_url: str = f"http://{os.getenv('HOST_IP', 'localhost')}:25000"):
3732        """Initialize LLM client with API server URL"""
3733        self.base_url = base_url.rstrip("/")
3734
3735    def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
3736        """
3737        Call LLM API endpoint
3738
3739        Args:
3740            query (str): The prompt/query to send to the LLM
3741            llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
3742
3743        Returns:
3744            tuple[str, int]: (Generated response text, Total tokens used for this key)
3745
3746        Raises:
3747            requests.exceptions.RequestException: If API call fails
3748        """
3749        payload = {"query": query, "llm_name": llm_name}
3750
3751        response = requests.post(f"{self.base_url}/call", json=payload)
3752        response.raise_for_status()
3753
3754        result = response.json()
3755        return result["result"], result["total_tokens"]
3756    
3757    def embed(self, query: str) -> list[float]:
3758        """
3759        Get embeddings for text using the embedding API endpoint
3760
3761        Args:
3762            query (str): The text to get embeddings for
3763
3764        Returns:
3765            list[float]: Vector embedding of the input text
3766
3767        Raises:
3768            requests.exceptions.RequestException: If API call fails
3769        """
3770        payload = {"query": query}
3771
3772        response = requests.post(f"{self.base_url}/embed", json=payload)
3773        response.raise_for_status()
3774
3775        result = response.json()
3776        return result["vector"]
3777
3778class SWEBase(ABC):
3779    def __init__(self):
3780        self.llm = LLMClient()
3781
3782    @abstractmethod
3783    def __call__(self, repo_location: str, issue_description: str) -> Patch:
3784        pass
3785
3786
3787
3788---
3789File: /coding/finetune/__init__.py
3790---
3791
3792from .pipeline import FinetunePipeline
3793
3794ALLOWED_MODULES = [
3795    "ast",
3796    "sentencetransformers",
3797    "networkx",
3798    "grep-ast",
3799    "tree-sitter",
3800    "tree-sitter-languages",
3801    "rapidfuzz",
3802    "llama-index",
3803    "pydantic",
3804    "numpy",
3805    "ruamel.yaml",
3806    "json"
3807]
3808
3809
3810---
3811File: /coding/finetune/dockerutil.py
3812---
3813
3814import os
3815import ast
3816import json
3817import docker
3818import tempfile
3819import threading
3820from pathlib import Path
3821
3822from coding.constants import COMPETITION_ID
3823from ..helpers.git import GitRepo
3824
3825def exec_container_with_timeout(container, command, timeout):
3826    """
3827    Executes a command in a Docker container with a timeout.
3828
3829    Args:
3830        container: The Docker container object.
3831        command: The command to execute.
3832        timeout: Timeout in seconds.
3833
3834    Returns:
3835        Tuple of exec result and logs.
3836
3837    Raises:
3838        TimeoutError: If the command takes longer than the timeout.
3839    """
3840    exec_result = None
3841    logs = None
3842    exception = None
3843
3844    def target():
3845        nonlocal exec_result, logs, exception
3846        try:
3847            exec_result, logs = container.exec_run(command)
3848        except Exception as e:
3849            exception = e
3850
3851    thread = threading.Thread(target=target)
3852    thread.start()
3853    thread.join(timeout)
3854
3855    if thread.is_alive():
3856        # Kill the container if the timeout is exceeded
3857        try:
3858            container.kill()
3859        except Exception as kill_exception:
3860            raise RuntimeError(
3861                f"Failed to kill the container after timeout: {kill_exception}"
3862            )
3863
3864        raise TimeoutError(
3865            f"The command '{command}' exceeded the timeout of {timeout} seconds and the container was killed."
3866        )
3867
3868    if exception:
3869        raise exception
3870
3871    return exec_result, logs
3872
3873def build_docker_container(logic_files: dict, hotkey: str, repo_files: dict) -> str:
3874    """
3875    Builds a Docker container for evaluating model logic.
3876
3877    Args:
3878        logic_files (dict): Dictionary mapping filenames to file contents
3879        hotkey (str): Unique identifier for the logic
3880        repo_files (dict): Dictionary mapping filenames to file contents to copy to repo
3881        repo_path (str): Path to copy repo files to
3882
3883    Returns:
3884        str: ID of the built container
3885    """
3886    # Initialize Docker client
3887    client = docker.from_env()
3888
3889    # Create temporary directory to store files
3890    with tempfile.TemporaryDirectory() as temp_dir:
3891        # Write logic files to temp directory
3892        for filename, content in logic_files.items():
3893            file_path = os.path.join(temp_dir, filename)
3894            # Create all parent directories
3895            os.makedirs(os.path.dirname(file_path), exist_ok=True)
3896            # Create the file and write content
3897            with open(file_path, "w", encoding="latin-1") as f:
3898                f.write(content)
3899
3900        # Write repo files to repo path
3901        for filename, content in repo_files.items():
3902            file_path = os.path.join(temp_dir, "repo", filename)
3903            # Create all parent directories
3904            os.makedirs(os.path.dirname(file_path), exist_ok=True)
3905            # Create the file and write content
3906            with open(file_path, "w", encoding="latin-1") as f:
3907                f.write(content)
3908
3909        # Copy Dockerfile and server files
3910        swe_server_path = Path(__file__).parent / "swe-server"
3911        for item in swe_server_path.glob("*"):
3912            if item.is_file():
3913                dest_path = os.path.join(temp_dir, item.name)
3914                with open(item, "rb") as src, open(dest_path, "wb") as dst:
3915                    dst.write(src.read())
3916            elif item.is_dir():
3917                dest_dir = os.path.join(temp_dir, item.name)
3918                os.system(f"cp -r {item} {dest_dir}")
3919
3920        # Build the container
3921        try:
3922            image, logs = client.images.build(
3923                path=temp_dir, tag=f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower(), rm=True
3924            )
3925            return image.id
3926
3927        except docker.errors.BuildError as e:
3928            print(f"Error building container: {str(e)}")
3929            raise
3930        except docker.errors.APIError as e:
3931            print(f"Docker API error: {str(e)}")
3932            raise
3933
3934def run_docker_container(
3935    image_id: str, repo: GitRepo, hotkey: str, issue_description: str
3936) -> dict:
3937    """
3938    Runs a Docker container for evaluating model logic.
3939
3940    Args:
3941        image_id (str): ID of the Docker image to run
3942        repo (GitRepo): Git repository object containing code to evaluate
3943        hotkey (str): Unique identifier for the logic
3944        issue_description (str): Description of the issue to fix
3945
3946    Returns:
3947        dict: The patch output from the container
3948    """
3949    # Initialize Docker client
3950    client = docker.from_env()
3951
3952    container_name = f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower()
3953    
3954    try:
3955        # Remove any existing container with the same name
3956        try:
3957            existing = client.containers.get(container_name)
3958            existing.remove(force=True)
3959        except docker.errors.NotFound:
3960            pass
3961
3962        container = client.containers.create(
3963            image=image_id,
3964            name=container_name,
3965            detach=True,
3966            ports={"3000/tcp": 3000},
3967            extra_hosts={"host.docker.internal": "host-gateway"},
3968            environment={"HOST_IP": os.getenv("HOST_IP", "localhost"), "ISSUE_DESCRIPTION": issue_description},
3969            # environment={"HOST_IP": "host.docker.internal"},
3970            # auto_remove=True  # Container will be automatically removed when stopped
3971        )
3972
3973        # Start the container
3974        container.start()
3975        logs = container.logs().decode('utf-8')
3976
3977        # Wait for container to finish and get logs
3978        result = container.wait()
3979        logs = container.logs().decode('utf-8')
3980        print("===== CONTAINER LOGS =====")
3981        print(logs)
3982        print("===== CONTAINER LOGS =====")
3983        # Parse the patch from the logs
3984        patch_line = next(line for line in reversed(logs.split('\n')) if line.startswith('Patch:'))
3985        try:
3986            # First try parsing as JSON
3987            patch_dict = json.loads(patch_line.replace('Patch:', '').strip())
3988        except json.JSONDecodeError:
3989            # Fall back to safely evaluating as literal Python dict
3990            patch_dict = ast.literal_eval(patch_line.replace('Patch:', '').strip())
3991
3992        # Cleanup container
3993        try:
3994            container.stop(timeout=1)
3995            container.remove(force=True)
3996        except:
3997            pass
3998
3999        return patch_dict
4000
4001    except docker.errors.APIError as e:
4002        print(f"Docker API error: {str(e)}")
4003        raise
4004    
4005
4006def run_docker_container_from_base(
4007    container_name: str, repo: GitRepo, hotkey: str, issue_description: str, logic_files: dict
4008) -> dict:
4009    """
4010    Runs a Docker container for evaluating model logic.
4011
4012    Args:
4013        container_name (str): Name of the Docker container to run
4014        repo (GitRepo): Git repository object containing code to evaluate
4015        hotkey (str): Unique identifier for the logic
4016        issue_description (str): Description of the issue to fix
4017
4018    Returns:
4019        dict: The patch output from the container
4020    """
4021    # Initialize Docker client
4022    client = docker.from_env()
4023    # container_name = f"swe-logic-{str(hotkey)}-{COMPETITION_ID}".lower()
4024    with tempfile.TemporaryDirectory() as temp_dir:
4025        code_dir = os.path.join(temp_dir, "code")
4026        os.makedirs(code_dir)
4027
4028        # Write logic files to code directory
4029        for filename, content in logic_files.items():
4030            file_path = os.path.join(code_dir, filename)
4031            # Create all parent directories
4032            os.makedirs(os.path.dirname(file_path), exist_ok=True)
4033            # Create the file and write content
4034            with open(file_path, "w", encoding="latin-1") as f:
4035                f.write(content)
4036
4037        # Write repo files to repo path
4038        for filename, content in repo.files.items():
4039            file_path = os.path.join(temp_dir, "repo", filename)
4040            # Create all parent directories
4041            os.makedirs(os.path.dirname(file_path), exist_ok=True)
4042            # Create the file and write content
4043            with open(file_path, "w", encoding="latin-1") as f:
4044                f.write(content)
4045
4046        # Copy Dockerfile and server files
4047        swe_server_path = Path(__file__).parent / "swe-server"
4048        for item in swe_server_path.glob("*"):
4049            if item.is_file():
4050                dest_path = os.path.join(code_dir, item.name)
4051                with open(item, "rb") as src, open(dest_path, "wb") as dst:
4052                    dst.write(src.read())
4053            elif item.is_dir():
4054                dest_dir = os.path.join(code_dir, item.name)
4055                os.system(f"cp -r {item} {dest_dir}")
4056
4057        try:
4058            # Remove any existing container with the same name
4059            try:
4060                existing = client.containers.get(container_name)
4061                existing.remove(force=True)
4062            except docker.errors.NotFound:
4063                pass
4064
4065            container = client.containers.create(
4066                image="brokespace/swe-server:latest",
4067                name=container_name,
4068                detach=True,
4069                # ports={"3000/tcp": 3000},
4070                extra_hosts={"host.docker.internal": "host-gateway"},
4071                environment={"HOST_IP": os.getenv("HOST_IP", "localhost"), "ISSUE_DESCRIPTION": issue_description},
4072                command="sleep infinity"
4073            )
4074
4075            # Start the container
4076            container.start()
4077
4078            # Copy files from temp_dir into container
4079            os.system(f"docker cp {temp_dir}/. {container_name}:/app/")
4080            
4081            # Execute runner.py in container
4082            exec_result, logs = exec_container_with_timeout(container, "python3 -u /app/code/runner.py", 600)
4083            logs = logs.decode('utf-8')
4084            # Parse the patch from the logs
4085            patch_line = next(line for line in reversed(logs.split('\n')) if line.startswith('Patch:'))
4086            try:
4087                # First try parsing as JSON
4088                patch_dict = json.loads(patch_line.replace('Patch:', '').strip())
4089            except json.JSONDecodeError:
4090                # Fall back to safely evaluating as literal Python dict
4091                patch_dict = ast.literal_eval(patch_line.replace('Patch:', '').strip())
4092
4093            return patch_dict
4094
4095        except docker.errors.APIError as e:
4096            print(f"Docker API error: {str(e)}")
4097            raise
4098        
4099        finally:
4100            # Cleanup container
4101            try:
4102                container.stop(timeout=1)
4103            except:
4104                pass
4105            
4106            try:
4107                container.remove(force=True)
4108            except:
4109                pass
4110
4111
4112
4113---
4114File: /coding/finetune/evaluate.py
4115---
4116
4117from transformers import AutoTokenizer, AutoModelForCausalLM
4118
4119
4120def evaluate(
4121    model: AutoModelForCausalLM,
4122    tokenizer: AutoTokenizer,
4123    renderer: callable,
4124    query: str,
4125) -> str:
4126    messages = [{"role": "user", "content": query}]
4127    inputs = tokenizer(renderer(messages), return_tensors="pt").to("cuda")
4128    outputs = model.generate(**inputs, max_new_tokens=4096)
4129    prompt = renderer(messages)
4130    response = tokenizer.decode(outputs[0], skip_special_tokens=False)
4131    response = response[len(prompt) :].strip()
4132    special_tokens = tokenizer.all_special_tokens
4133    for token in special_tokens:
4134        response = response.replace(token, "")
4135    return response.strip()
4136
4137
4138
4139---
4140File: /coding/finetune/model.py
4141---
4142
4143import os
4144import time
4145import shutil
4146import psutil
4147import random
4148import asyncio
4149import requests
4150from tqdm import tqdm   
4151import bittensor as bt
4152from transformers import AutoConfig
4153from langchain_openai import ChatOpenAI
4154from sglang.utils import terminate_process
4155from coding.utils.shell import execute_shell_command
4156
4157MODEL_DIR = "~/.cache/huggingface/hub"
4158
4159def is_phi_model(model_name: str):
4160    config = AutoConfig.from_pretrained(model_name)
4161    return "phi3" in config.model_type.lower()
4162
4163
4164# Delete the model from the huggingface cache when we're done serving it so we don't run out of disk space
4165def delete_model_from_hf_cache(model_name: str):
4166    # Determine the cache directory
4167    cache_dir = os.path.expanduser(MODEL_DIR)
4168    
4169    # Format the directory name based on the model name
4170    model_cache_dir = os.path.join(cache_dir, f"models--{model_name.replace('/', '--')}")
4171    
4172    # Check if the directory exists and delete it
4173    if os.path.exists(model_cache_dir):
4174        try:
4175            shutil.rmtree(model_cache_dir)
4176            bt.logging.debug(f"Finetune: Model has been removed from the HF cache.")
4177        except Exception as e:
4178            bt.logging.error(f"Finetune: Error deleting model: from HF cache: {e}")
4179    else:
4180        bt.logging.debug(f"Finetune: Model not found in the cache, could not delete")
4181
4182def wait_for_server(base_url: str, server_process, timeout: int = None) -> None:
4183    """Wait for the server to be ready by polling the /v1/models endpoint.
4184
4185    Args:
4186        base_url: The base URL of the server
4187        server_process: The process to terminate if the server is ready
4188        timeout: Maximum time to wait in seconds. None means wait forever.
4189    """
4190    start_time = time.time()
4191    procutil = psutil.Process(int(server_process.pid))
4192    while True:
4193        try:
4194            if timeout and time.time() - start_time > timeout:
4195                bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4196                raise TimeoutError("Server did not become ready within timeout period")
4197
4198            # Use psutil to monitor the process
4199            if not procutil.is_running():  # Check if process is still running
4200                bt.logging.error(f"Finetune: Server process terminated unexpectedly, check VRAM usage")
4201                raise Exception("Server process terminated unexpectedly, potentially VRAM usage issue")
4202            if server_process.poll() is not None:
4203                bt.logging.error(f"Finetune: Server process terminated with code {server_process.poll()}")
4204                raise Exception(f"Server process terminated with code {server_process.poll()}")
4205
4206            response = requests.get(
4207                f"{base_url}/v1/models",
4208                headers={"Authorization": "Bearer None"},
4209            )
4210            if response.status_code == 200:
4211                time.sleep(5)   
4212                break
4213
4214        except requests.exceptions.RequestException:
4215            time.sleep(1)
4216
4217
4218class ModelServer:
4219    def __init__(self, model_name: str):
4220        self.model_path = f"{model_name}"
4221        self.model_name = model_name
4222        # random port between 12000 and 15999
4223        self.port = random.randint(12000, 15999)
4224        self.server_process = None
4225        self.start_server()
4226        
4227
4228    def invoke(self, messages: list[dict]):
4229        return self.llm.invoke(messages).content
4230
4231    async def ainvoke(self, messages: list[dict]):
4232        response = await self.llm.ainvoke(messages)
4233        return response.content
4234    
4235    async def _invoke_batch_async(self, message_batches, batch_size=10):
4236        """Async function to process all batches."""
4237        results = []
4238        for i in tqdm(range(0, len(message_batches), batch_size), desc="Processing batches"):
4239            batch = message_batches[i : i + batch_size]
4240            # Schedule all tasks in this batch concurrently
4241            tasks = [self.llm.ainvoke(messages) for messages in batch]
4242            # Wait for them all
4243            responses = await asyncio.gather(*tasks)
4244            # Collect results
4245            results.extend(response.content for response in responses)
4246        return results
4247    
4248    def invoke_batch(self, message_batches, batch_size=10):
4249        return asyncio.run(self._invoke_batch_async(message_batches, batch_size))
4250    
4251    def start_server(self):
4252        if not is_phi_model(self.model_name):
4253            self.server_process = execute_shell_command(
4254                f"""
4255                {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4256                --model {self.model_name} \
4257                --model-path {self.model_path} \
4258                --port {self.port} \ 
4259                --host 0.0.0.0 \
4260                --quantization fp8 \ 
4261                --mem-fraction-static 0.6 \
4262                --context-length 8096 \
4263                --disable-cuda-graph
4264                """,
4265                self.model_name
4266            )
4267        else:
4268            self.server_process = execute_shell_command(
4269                f"""
4270                {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4271                --model {self.model_name} \
4272                --model-path {self.model_path} \
4273                --port {self.port} \ 
4274                --host 0.0.0.0 \
4275                --quantization fp8 \ 
4276                --mem-fraction-static 0.6 \
4277                --context-length 8096 \
4278                --attention-backend triton
4279                """,
4280                self.model_name
4281            )
4282        # Wait for the server to be ready
4283        try:
4284            wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4285        except Exception as e:
4286            terminate_process(self.server_process)
4287            self.server_process.kill()
4288            bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4289
4290            if not is_phi_model(self.model_name):
4291                self.server_process = execute_shell_command(
4292                    f"""
4293                    {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4294                    --model {self.model_name} \
4295                    --model-path {self.model_path} \
4296                    --port {self.port} \ 
4297                    --host 0.0.0.0 \
4298                    --mem-fraction-static 0.6 \
4299                    --context-length 8096 \
4300                    --disable-cuda-graph
4301                    """,
4302                    self.model_name
4303                )
4304            else:
4305                self.server_process = execute_shell_command(
4306                    f"""
4307                    {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4308                    --model {self.model_name} \
4309                    --model-path {self.model_path} \
4310                    --port {self.port} \ 
4311                    --host 0.0.0.0 \
4312                    --mem-fraction-static 0.6 \
4313                    --context-length 8096 \
4314                    --attention-backend triton
4315                    """,
4316                    self.model_name
4317                )
4318
4319            try:
4320                wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4321            except Exception as e:
4322                # it might be a phi model, try again
4323                terminate_process(self.server_process)
4324                self.server_process.kill()
4325                self.server_process = execute_shell_command(
4326                    f"""
4327                    {os.getcwd()}/.venvsglang/bin/python -m sglang.launch_server \
4328                    --model {self.model_name} \
4329                    --model-path {self.model_path} \
4330                    --port {self.port} \ 
4331                    --host 0.0.0.0 \
4332                    --mem-fraction-static 0.6 \
4333                    --context-length 8096 \
4334                    --attention-backend triton
4335                    """,
4336                    self.model_name
4337                )
4338                try:
4339                    wait_for_server(f"http://localhost:{self.port}", self.server_process, timeout=60*15)
4340                except Exception as e:
4341                    bt.logging.error(f"Finetune: Server did not become ready within timeout period")
4342                    self.server_process.kill()
4343                    self.cleanup()
4344                    raise Exception(f"Error running model {e}")
4345            
4346
4347        self.llm = ChatOpenAI(
4348            api_key="None",
4349            base_url=f"http://localhost:{self.port}/v1",
4350            model=self.model_name,
4351        )
4352
4353    def cleanup(self):
4354        try:
4355            if self.server_process:
4356                try:
4357                    terminate_process(self.server_process)
4358                except:
4359                    pass
4360                self.server_process = None
4361            delete_model_from_hf_cache(self.model_name)
4362            self.server_process.kill()
4363        except Exception as e:
4364            pass
4365
4366    def __del__(self):
4367        self.cleanup()
4368        
4369    def __enter__(self):
4370        return self
4371        
4372    def __exit__(self, exc_type, exc_val, exc_tb):
4373        self.cleanup()
4374
4375if __name__ == "__main__":
4376    # Test the model server with a simple prompt
4377    model_name = "MistralAI/Mistral-7B-Instruct-v0.1"
4378    server = ModelServer(model_name)
4379    
4380    try:
4381        # Test basic invoke
4382        query = "What is 2+2?"
4383        response = server.invoke(query)
4384        print("Basic invoke test:")
4385        print(f"Response: {response}\n")
4386
4387        # Test batch invoke
4388        queries = [f"What is {i}+{i}?" for i in range(3)]
4389        responses = server.invoke_batch(queries, batch_size=2)
4390        print("Batch invoke test:")
4391        for i, response in enumerate(responses):
4392            print(f"Batch {i} response: {response}")
4393
4394    except Exception as e:
4395        print(f"Error during testing: {e}")
4396    finally:
4397        server.cleanup()
4398
4399
4400---
4401File: /coding/finetune/pipeline.py
4402---
4403
4404import os
4405import pickle
4406import argparse
4407import traceback
4408import bittensor as bt
4409from typing import List
4410from pydantic import BaseModel
4411from .tracker import gather_all_logics
4412from concurrent.futures import ThreadPoolExecutor, as_completed
4413
4414from .dockerutil import run_docker_container_from_base
4415
4416from coding.schemas import Patch
4417from coding.schemas.context import Context
4418from coding.constants import COMPETITION_ID
4419from coding.rewards.codesim import CodeSimModel
4420from coding.schemas.tracking import TrackingInfo
4421from coding.constants import COMPETITION_ID, ALLOWED_MODULES, NUM_ALLOWED_CHARACTERS, ALLOWED_IMPORTS
4422
4423from coding.tasks.swe import SWEBenchTask
4424from coding.datasets.swe import SWEBenchDataset
4425from coding.finetune.llm.manager import LLMManager
4426from coding.helpers.codeanal import verify_code_usage
4427from coding.utils.config import config as util_config
4428from coding.utils.config import add_validator_args
4429
4430
4431
4432    
4433    
4434class FinetuneEventResults(BaseModel):
4435    trackers: List[TrackingInfo]
4436    competition_id: int = COMPETITION_ID
4437    
4438    def __state_dict__(self):
4439        return {
4440            "trackers": [tracker.model_dump() for tracker in self.trackers],
4441            "competition_id": COMPETITION_ID,
4442        }
4443    
4444    def public_state_dict(self):
4445        trackers = [tracker.model_dump() for tracker in self.trackers]
4446        for tracker in trackers:
4447            tracker["model"] = None
4448        return {
4449            "trackers": trackers,
4450            "competition_id": COMPETITION_ID,
4451        }
4452
4453
4454
4455def generate_swe_tasks(ds: SWEBenchDataset, n: int = 1000, code_scorer =  None) -> List[SWEBenchTask]:
4456    tasks = []
4457    while len(tasks) < n:
4458        try:
4459            tasks.append(SWEBenchTask(llm=None, context=Context(**ds.get()), code_scorer=code_scorer))
4460        except Exception as e:
4461            bt.logging.error(f"Error generating task: {e}")
4462            print(traceback.format_exc())
4463    return tasks
4464
4465
4466def bittensor_injector(self):
4467    self.wallet = bt.wallet(config=self.config)
4468    self.dendrite = bt.dendrite(wallet=self.wallet)
4469    self.subtensor = bt.subtensor(config=self.config)
4470    self.metagraph = self.subtensor.metagraph(self.config.netuid)
4471
4472
4473def verify_logic(logic: dict) -> tuple[bool, str]:
4474    # Dictionary mapping modules to allowed functions/imports
4475    allowed_modules = ALLOWED_MODULES.copy()
4476    
4477    # Define allowed file extensions
4478    allowed_extensions = {'.yaml', '.py', '.txt', '.json'}
4479    
4480    for module in logic:
4481        # Handle folder paths by taking first component
4482        module_name = module.split("/")[0].split(".")[0]
4483        if module_name not in allowed_modules:
4484            allowed_modules.append(module_name)
4485            
4486    for key, value in logic.items():
4487        if value:
4488            # Check if the file extension is allowed
4489            file_extension = key.split('.')[-1]
4490            if f".{file_extension}" not in allowed_extensions:
4491                return False, f"File extension .{file_extension} is not allowed."
4492            
4493            # Create expanded allowed modules list that includes submodules and specific imports
4494            expanded_allowed = set()
4495            for mod in allowed_modules:
4496                expanded_allowed.add(mod)
4497                # If module is allowed, all its submodules are allowed
4498                for used_mod in value.split():
4499                    if used_mod.startswith(f"{mod}."):
4500                        expanded_allowed.add(used_mod)
4501                    # Check for specific allowed imports like "from os import getenv"
4502            usage_pass, usage_msg = verify_code_usage(value, list(expanded_allowed), ALLOWED_IMPORTS)
4503            if not usage_pass:
4504                return False, usage_msg
4505                
4506    total_chars = 0
4507    for key, value in logic.items():
4508        # Include full folder path in character count
4509        total_chars += len(key) + len(value)
4510        
4511    if total_chars > NUM_ALLOWED_CHARACTERS:
4512        return (
4513            False,
4514            f"Total characters: {total_chars} exceeds the limit of {NUM_ALLOWED_CHARACTERS}",
4515        )
4516        
4517    return True, "Logic is valid"
4518
4519class FinetunePipeline:
4520    def __init__(
4521        self, config, tracking_logics: List[TrackingInfo] = None,
4522    ):
4523        self.config = config
4524        try:
4525            bittensor_injector(self)
4526        except Exception as e:
4527            bt.logging.error(f"Error injecting bittensor: {e}")
4528            print(traceback.format_exc())
4529        self.code_sim_model = CodeSimModel()
4530        self.trackers = []
4531        self.dataset = SWEBenchDataset()
4532        self.load_results()
4533        self.llm_manager = LLMManager()
4534        
4535        if tracking_logics is None:
4536            self.load_logics()
4537        else:
4538            self.tracking_logics = tracking_logics
4539        
4540        self.load_tasks()
4541        self.load_completed_trackers()
4542        # Register cleanup to be called when the object is deleted
4543        # self._finalizer = weakref.finalize(self, self.cleanup)
4544
4545    def load_completed_trackers(self):
4546        if os.path.exists(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl"):
4547            with open(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl", "rb") as f:
4548                self.completed_trackers = pickle.load(f)
4549        else:
4550            self.completed_trackers = []
4551    
4552    def store_completed_trackers(self):
4553        with open(f"{self.config.neuron.full_path}/completed_trackers_{COMPETITION_ID}.pkl", "wb") as f:
4554            pickle.dump(self.completed_trackers, f)
4555    
4556    def load_tasks(self):
4557        if os.path.exists(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl"):
4558            with open(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "rb") as f:
4559                self.tasks = pickle.load(f)[:self.config.neuron.finetune_test_size]
4560                for task in self.tasks:
4561                    task.code_scorer = self.code_sim_model
4562        else:
4563            self.tasks = generate_swe_tasks(self.dataset, self.config.neuron.finetune_test_size, code_scorer=self.code_sim_model)
4564            self.store_tasks()
4565
4566    def load_results(self):
4567        results_file = f"{self.config.neuron.full_path}/results_{COMPETITION_ID}.pkl"
4568        if os.path.exists(results_file):
4569            with open(results_file, "rb") as f:
4570                saved_results = pickle.load(f)
4571                self.trackers = saved_results.get("trackers", [])
4572
4573    def store_logics(self):
4574        with open(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl", "wb") as f:
4575            pickle.dump(self.tracking_logics, f)
4576    
4577    def load_logics(self):
4578        if os.path.exists(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl"):
4579            with open(f"{self.config.neuron.full_path}/logics_{COMPETITION_ID}.pkl", "rb") as f:
4580                self.tracking_logics = pickle.load(f)
4581        else:
4582            self.tracking_logics = gather_all_logics(self)
4583            self.store_logics()
4584    
4585    @property
4586    def results(self) -> FinetuneEventResults:
4587        return FinetuneEventResults(
4588            trackers=self.trackers
4589        )
4590
4591    # TODO add time taken and handle race condition due to parallel execution 
4592    # make use the same docker container for each task , where task repo files are copied over needs to change
4593    def evaluate(self) -> FinetuneEventResults:
4594        # gather all logics
4595        bt.logging.info("Gathering all logics...")
4596        bt.logging.info(f"Gathered {len(self.tracking_logics)} logics.")
4597
4598        bt.logging.info("Verifying and building docker containers for each logic...")
4599        for tracker in self.tracking_logics:
4600            bt.logging.info(f"Verifying logic for hotkey {tracker.hotkey}...")
4601            pass_logic, pass_msg = verify_logic(tracker.logic)
4602            if not pass_logic:
4603                bt.logging.info(
4604                    f"Logic failed verification: {pass_msg} on tracker {tracker.hotkey}"
4605                )
4606                tracker.logic = {}
4607                continue
4608            bt.logging.info(f"Logic for hotkey {tracker.hotkey} passed verification.")
4609
4610        bt.logging.info(f"Beginning evaluation of {len(self.tasks)} tasks...")
4611        for tracker_idx, tracking_logic in enumerate(self.tracking_logics):
4612            bt.logging.info(f"Processing tracker {tracker_idx + 1}/{len(self.tracking_logics)}")
4613            # Skip if no logic provided
4614            if not tracking_logic.logic:
4615                bt.logging.info(f"No logic provided for tracker {tracking_logic.hotkey}, skipping...")
4616                tracking_logic.score = 0
4617                self.trackers.append(tracking_logic)
4618                continue
4619            
4620            previous_tracker = next((tracker for tracker in self.trackers if str(tracker.logic) == str(tracking_logic.logic)), None)
4621            if previous_tracker is not None:
4622                bt.logging.info(f"Finetune: Using previously evaluated score for hotkey: {tracking_logic.hotkey}")
4623                tracking_logic.score = previous_tracker.score
4624                if tracking_logic.hotkey != previous_tracker.hotkey:
4625                    self.trackers.append(tracking_logic)
4626                continue
4627
4628            # Otherwise, evaluate the logic
4629            bt.logging.info(f"Initializing LLM key for hotkey {tracking_logic.hotkey}...")
4630            self.llm_manager.init_key(tracking_logic.hotkey)
4631            bt.logging.info(f"Starting docker container for hotkey {tracking_logic.hotkey}...")
4632            scores = []
4633            # Create a thread pool to process tasks in parallel
4634            bt.logging.info("Starting thread pool for task processing...")
4635            with ThreadPoolExecutor() as executor:
4636                bt.logging.info("Thread pool started.")
4637                def process_task(task_data):
4638                    bt.logging.info(f"Processing task...")
4639                    task_idx, task = task_data
4640                    try:
4641                        bt.logging.info(f"Making request to container for hotkey {tracking_logic.hotkey}, task index {task_idx}...")
4642                        result = run_docker_container_from_base(
4643                            f"swe-logic-{str(tracking_logic.hotkey)}-{COMPETITION_ID}-{task_idx}".lower(),
4644                            task.repo,
4645                            tracking_logic.hotkey, 
4646                            task.query,
4647                            tracking_logic.logic
4648                        )
4649                        patch = Patch(**result)
4650                        bt.logging.info(f"Scoring response for hotkey {tracking_logic.hotkey}, task index {task_idx}...")
4651                        # TODO in the next comp uncomment the below
4652                        # score = task.score(patch, self.llm_manager.get_count())
4653                        score = task.score(patch, 1)
4654                        self.llm_manager.reset_count()
4655                        bt.logging.info(f"Score for hotkey {tracking_logic.hotkey}, task index {task_idx}: {score}")
4656                        return score
4657                    except Exception as e:
4658                        bt.logging.error(f"Request failed for hotkey {tracking_logic.hotkey}, task index {task_idx}: {e}")
4659                        print(traceback.format_exc())
4660                        return 0
4661
4662                # Keep track of active futures and tasks
4663                active_futures = {}
4664                task_queue = list(enumerate(self.tasks))
4665                task_idx = 0
4666
4667                # Start initial batch of 8 tasks
4668                bt.logging.info("Starting initial batch of 8 tasks...")
4669                while len(active_futures) < 8 and task_queue:
4670                    task_data = task_queue.pop(0)
4671                    future = executor.submit(process_task, task_data)
4672                    active_futures[future] = task_data
4673                
4674                bt.logging.info(f"Task queue drained, active futures left: {len(active_futures)}")
4675                # Process remaining tasks as others complete
4676                while active_futures:
4677                    completed_future = next(as_completed(active_futures))
4678                    task_data = active_futures.pop(completed_future)
4679                    
4680                    # Get score from completed task
4681                    score = completed_future.result()
4682                    scores.append(score)
4683                    bt.logging.info(f"Average score for hotkey {tracking_logic.hotkey}: {sum(scores) / len(scores)}")
4684                    
4685                    # Start next task if any remain
4686                    if task_queue:
4687                        task_data = task_queue.pop(0)
4688                        future = executor.submit(process_task, task_data)
4689                        active_futures[future] = task_data
4690                        
4691                    task_idx += 1
4692                    bt.logging.info(f"Completed task {task_idx}/{len(self.tasks)} for hotkey {tracking_logic.hotkey}")
4693            tracking_logic.score = sum(scores) / len(scores)
4694            self.trackers.append(tracking_logic)
4695            self.store_results()
4696            
4697            bt.logging.info(f"Cleaning up container for hotkey {tracking_logic.hotkey}...")
4698            bt.logging.info(f"Final score for hotkey {tracking_logic.hotkey}: {tracking_logic.score}")
4699            
4700        bt.logging.info("Evaluation complete!")
4701        self.store_results()
4702
4703        return self.results
4704    def __str__(self):
4705        return f"{self.__class__.__name__}(scores={self.scores!r}, models={self.tracking_logics!r})"
4706
4707    def __repr__(self):
4708        return self.__str__()
4709
4710    def __state_dict__(self):
4711        return {
4712            "scores": self.scores,
4713            "tracking_logics": [model.model_dump() for model in self.tracking_logics],
4714        }
4715
4716    @staticmethod
4717    def start(
4718        config, code_sim_model: CodeSimModel = None
4719    ) -> FinetuneEventResults:
4720        if code_sim_model is None:
4721            code_sim_model = CodeSimModel()
4722        pipeline = FinetunePipeline(config, code_sim_model)
4723        result = pipeline.evaluate()
4724        pipeline.cleanup()  # Ensure cleanup is called after evaluation
4725        return result
4726
4727    def store_tasks(self):
4728        with open(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "wb") as f:
4729            for task in self.tasks:
4730                task.code_scorer = None
4731            pickle.dump(self.tasks, f)
4732
4733    def store_results(self):
4734        results_file = f"{self.config.neuron.full_path}/results_{COMPETITION_ID}.pkl"
4735        temp_file = results_file + ".tmp"
4736        
4737        # Write to a temp file first
4738        with open(temp_file, "wb") as f:
4739            pickle.dump({"trackers": self.trackers}, f)
4740        
4741        # Replace the old file with the new
4742        os.replace(temp_file, results_file)
4743
4744    @staticmethod
4745    def generate_tasks(config) -> List[SWEBenchTask]:
4746        dataset = SWEBenchDataset()
4747        code_scorer = CodeSimModel()
4748        tasks = generate_swe_tasks(dataset, config.neuron.finetune_test_size, code_scorer=code_scorer)
4749        with open(f"{config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl", "wb") as f:
4750            for task in tasks:
4751                task.code_scorer = None
4752            pickle.dump(tasks, f)
4753    
4754    @staticmethod
4755    def tasks_exist(config):
4756        return os.path.exists(f"{config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl")
4757    
4758    def cleanup(self):
4759        """
4760        Delete the tasks file and any other task files
4761        """
4762        os.remove(f"{self.config.neuron.full_path}/tasks_{COMPETITION_ID}.pkl")
4763        # check if tasks_*.pkl exists and delete it if it does
4764        for file in os.listdir(self.config.neuron.full_path):
4765            if file.startswith("tasks_") and file.endswith(".pkl"):
4766                os.remove(os.path.join(self.config.neuron.full_path, file))
4767            if file.startswith("results_") and file.endswith(".pkl"):
4768                os.remove(os.path.join(self.config.neuron.full_path, file))
4769
4770
4771
4772---
4773File: /coding/finetune/score.py
4774---
4775
4776import bittensor as bt
4777from typing import List, Any
4778from huggingface_hub import model_info
4779from concurrent.futures import ProcessPoolExecutor
4780
4781from coding.tasks.task import Task
4782from coding.finetune.evaluate import evaluate
4783from coding.finetune.model import ModelServer
4784from coding.rewards.codesim import CodeSimModel
4785
4786
4787def cleanup_code_sim_model(self):
4788    try:
4789        import torch
4790        from accelerate.utils import release_memory
4791        
4792        torch.cuda.empty_cache()
4793        with torch.no_grad():
4794            self.code_sim_model.code_scorer._model.cpu()
4795            release_memory(self.code_sim_model.code_scorer._model)
4796            del self.code_sim_model.code_scorer._model
4797        
4798        with torch.no_grad():
4799            self.code_sim_model.code_scorer._tokenizer.cpu()
4800            release_memory(self.code_sim_model.code_scorer._tokenizer)
4801            del self.code_sim_model.code_scorer._tokenizer
4802        
4803        del self.code_sim_model
4804    except Exception as e:
4805        pass
4806
4807def validate_model_info(model_name: str) -> bool:
4808    try:
4809        miner_model_info = model_info(model_name)
4810        license = miner_model_info.card_data['license']
4811        total_size = miner_model_info.safetensors.total
4812        return license in ["apache-2.0", "cc-by-nc-4.0", "mit"] and total_size < 10000000000
4813    except Exception as e:
4814        bt.logging.info(f"Error validating model {model_name}: {e}")
4815        return False
4816
4817def score(self, model_name: str, tasks: List[Task]) -> float:
4818    """
4819    Calculate the average score across multiple tasks for a given model.
4820
4821    Args:
4822        model_name (str): Name or path of the model to evaluate
4823        prompt_tokens (dict): Dictionary containing FIM prompt tokens:
4824            - "prefix": the prefix of the prompt
4825            - "middle": the middle of the prompt
4826            - "suffix": the suffix of the prompt
4827        tasks (List[Task]): List of Task objects to evaluate the model on. Task must be of the FIM type.
4828
4829    Returns:
4830        float: Average score across all tasks, where each task score is between 0 and 1
4831
4832    The function:
4833    1. Validates the model info
4834    2. Loads the model and tokenizer
4835    3. For each task:
4836        - Evaluates the model's response on the task query
4837        - Calculates a score for that response
4838    4. Cleans up model resources
4839    5. Returns mean score across all tasks
4840    """
4841    
4842    if not validate_model_info(model_name):
4843        bt.logging.info(f"Model {model_name} is not valid. It must have a valid license and be less than 10B parameters.")
4844        return 0.0
4845    
4846    model_server = None
4847    try:
4848        model_server = ModelServer(model_name)
4849    except Exception as e:
4850        bt.logging.info(f"Error loading model {model_name}: {e}") # TODO change to logging
4851        try:
4852            model_server.cleanup()
4853        except Exception as e:
4854            pass
4855        return 0.0
4856    
4857    scores = []
4858    responses = []
4859    try:
4860        # Create list of queries
4861        queries = [task.query for task in tasks]
4862        
4863        # Make parallel calls using asyncio
4864        responses = model_server.invoke_batch(queries)
4865        model_server.cleanup()
4866        del model_server
4867        self.code_sim_model = CodeSimModel()
4868        # Get references
4869        references = [task.reference for task in tasks]
4870        scores = self.code_sim_model.similarity_batch(references, responses)
4871        return sum(scores) / len(scores)
4872    except Exception as e:
4873        bt.logging.info(f"Error evaluating model: {e}")
4874        try:
4875            model_server.cleanup()
4876        except Exception as e:
4877            pass
4878        return 0.0
4879    finally:
4880        cleanup_code_sim_model(self)
4881
4882
4883
4884
4885
4886---
4887File: /coding/finetune/tracker.py
4888---
4889
4890from typing import List
4891
4892from coding.protocol import LogicSynapse
4893from coding.schemas.tracking import TrackingInfo
4894from coding.utils.uids import get_miner_uids, get_hotkey_from_uid
4895
4896def gather_all_logics(validator) -> List[TrackingInfo]:
4897    uids = get_miner_uids(validator)
4898    axons = [validator.metagraph.axons[uid] for uid in uids]
4899    synapse = LogicSynapse()
4900    responses = []
4901    for axon in axons:
4902        try:
4903            responses.append(validator.dendrite.query(axons=[axon], synapse=synapse, timeout=45, deserialize=False)[0])
4904        except Exception as e:
4905            print("Error querying axon", axon, e)
4906            responses.append(synapse)
4907    return [
4908        TrackingInfo(
4909            logic=synapse.logic,
4910            block=validator.metagraph.block,
4911            hotkey=get_hotkey_from_uid(validator, uids[i]),
4912            uid=uids[i],
4913            score=0.0,
4914        )
4915        for i, synapse in enumerate(responses)
4916    ]
4917
4918
4919
4920---
4921File: /coding/helpers/__init__.py
4922---
4923
4924from .selector import Selector
4925from .parser import *
4926from .cosine import *
4927from .forwards import *
4928from .fim import *
4929
4930
4931---
4932File: /coding/helpers/codeanal.py
4933---
4934
4935import ast
4936from typing import List, Dict
4937
4938def verify_code_usage(code: str, allowed_modules: List[str], allowed_imports: Dict[str, List[str]]) -> tuple[bool, str]:
4939    try:
4940        tree = ast.parse(code)
4941        imported_modules = set()
4942        imported_names = {}  # Track what names were imported from each module
4943        for node in ast.walk(tree):
4944            if isinstance(node, ast.Import):
4945                for alias in node.names:
4946                    # Only block import if module is in allowed_imports but used without restrictions
4947                    if alias.name in allowed_imports and not allowed_imports[alias.name]:
4948                        return False, f"Disallowed unrestricted use of module: {alias.name}"
4949                    if alias.name not in allowed_modules and alias.name not in allowed_imports:
4950                        return False, f"Disallowed module: {alias.name}"
4951                    imported_modules.add(alias.name)
4952            elif isinstance(node, ast.ImportFrom):
4953                if node.module not in allowed_modules and node.module not in allowed_imports:
4954                    return False, f"Disallowed module: {node.module}"
4955                # Track imported names from restricted modules
4956                if node.module in allowed_imports:
4957                    imported_names[node.module] = set()
4958                    for alias in node.names:
4959                        if alias.name not in allowed_imports[node.module]:
4960                            return False, f"Disallowed import {alias.name} from module {node.module}"
4961                        imported_names[node.module].add(alias.name)
4962                imported_modules.add(node.module)
4963            elif isinstance(node, ast.Call):
4964                if isinstance(node.func, ast.Attribute):
4965                    # Check if attribute access like os.getenv is allowed
4966                    if isinstance(node.func.value, ast.Name):
4967                        module_name = node.func.value.id
4968                        # Only check restricted functions if module was imported and has restrictions
4969                        if module_name in imported_modules and module_name in allowed_imports:
4970                            if node.func.attr not in allowed_imports[module_name]:
4971                                return False, f"Disallowed function {module_name}.{node.func.attr}"
4972                elif isinstance(node.func, ast.Name):
4973                    if node.func.id == 'eval' or node.func.id == 'exec':
4974                        return False, f"Dangerous built-in function call: {node.func.id}"
4975            elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Call):
4976                if isinstance(node.value.func, ast.Name):
4977                    if node.value.func.id in ['eval', 'exec']:
4978                        return False, f"Dangerous built-in function call: {node.value.func.id}"
4979        return True, "Code is safe"
4980    except Exception as e:
4981        return False, f"Error during parsing: {e}"
4982
4983
4984---
4985File: /coding/helpers/cosine.py
4986---
4987
4988import numpy as np
4989from sklearn.metrics.pairwise import cosine_similarity
4990
4991def cosim(model, text1: str, text2: str) -> float:
4992    # Load the pre-trained sentence transformer model
4993    
4994    # Embed the texts
4995    embeddings = model.encode([text1, text2])
4996    
4997    # Calculate cosine similarity
4998    similarity = cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]
4999    
5000    return similarity
5001
5002def normalize_cosim(value, min_value=0.5, max_value=1.0, exponent=1.3):
5003    """
5004    Exponentially normalize the cosine similarity value to a range of 0 to 1.
5005
5006    Parameters:
5007    value (float): The cosine similarity value to be normalized.
5008    min_value (float): The minimum value of the original range. Default is 0.5.
5009    max_value (float): The maximum value of the original range. Default is 1.0.
5010    exponent (float): The exponent to be used for the normalization. Default is 1.3.
5011
5012    Returns:
5013    float: The exponentially normalized value in the range of 0 to 1, or 0 if the result is invalid.
5014    """
5015    if min_value == max_value:
5016        raise ValueError("min_value and max_value must be different")
5017
5018    # First normalize linearly
5019    linear_normalized_value = (value - min_value) / (max_value - min_value)
5020
5021    # Check for invalid linear_normalized_value (e.g., NaN or out of bounds)
5022    if np.isnan(linear_normalized_value) or linear_normalized_value < 0 or linear_normalized_value > 1:
5023        return 0
5024
5025    # Then apply the exponential transformation
5026    exponential_normalized_value = np.power(linear_normalized_value, exponent)
5027    
5028    return exponential_normalized_value
5029
5030
5031---
5032File: /coding/helpers/fim.py
5033---
5034
5035import random
5036from typing import Tuple
5037
5038def insert_fim_hole(code: str) -> Tuple[str, str]:
5039    lines = code.splitlines()
5040    if len(lines) < 2:
5041        return code, []
5042
5043    # Determine the maximum possible size of the hole (between 1 and 15 lines)
5044    max_hole_size = min(15, len(lines))
5045    
5046    # Randomly select the start index and the size of the hole
5047    start_index = random.randint(0, len(lines) - 1)
5048    hole_size = random.randint(1, max_hole_size)
5049    
5050    # Ensure the hole does not exceed the bounds of the code
5051    end_index = min(start_index + hole_size - 1, len(lines) - 1)
5052
5053    # Extract the selected lines
5054    replaced_lines = lines[start_index:end_index + 1]
5055
5056    # Replace the selected lines with "<|fim_hole|>"
5057    lines[start_index:end_index + 1] = ["<|fim_hole|>"]
5058
5059    # Reconstruct the code
5060    new_code = "\n".join(lines)
5061    
5062    return new_code, "\n".join(replaced_lines)
5063
5064
5065---
5066File: /coding/helpers/forwards.py
5067---
5068
5069import json
5070import time
5071import traceback
5072import bittensor as bt
5073from starlette.types import Send
5074from typing import List, Any, Dict
5075from langchain_core.runnables.base import RunnableSequence
5076
5077
5078async def string_forward(string, send: Send):
5079    await send(
5080        {
5081            "type": "http.response.body",
5082            "body": string,
5083            "more_body": False,
5084        }
5085    )
5086
5087async def chain_forward(
5088        self,
5089        query: str,
5090        files: List[Any],
5091        extra_info: Dict[str, Any],
5092        init_time: float,
5093        timeout_threshold: float,
5094        chain: RunnableSequence,
5095        chain_formatter: Dict[str, str],
5096        send: Send,
5097    ):
5098    buffer = []
5099    temp_completion = ""  # for wandb logging
5100    timeout_reached = False
5101    try:
5102        # Langchain built in streaming. 'astream' also available for async
5103        for token in chain.stream(chain_formatter):
5104            if not isinstance(token, str):
5105                token = token.content
5106            buffer.append(token)
5107
5108            if time.time() - init_time > timeout_threshold:
5109                bt.logging.debug(f"⏰ Timeout reached, stopping streaming")
5110                timeout_reached = True
5111                break
5112
5113            if (
5114                not "broken_file" in extra_info.keys()
5115                and len(buffer) == self.config.neuron.streaming_batch_size
5116            ):
5117                joined_buffer = "".join(buffer)
5118                temp_completion += joined_buffer
5119                bt.logging.debug(f"Streamed tokens: {repr(joined_buffer)}")
5120
5121                await send(
5122                    {
5123                        "type": "http.response.body",
5124                        "body": joined_buffer,
5125                        "more_body": True,
5126                    }
5127                )
5128                buffer = []
5129
5130        if (
5131            buffer and not timeout_reached
5132        ):  # Don't send the last buffer of data if timeout.
5133            body = "".join(buffer)
5134            await send(
5135                {
5136                    "type": "http.response.body",
5137                    "body": body,
5138                    "more_body": False,
5139                }
5140            )
5141    except Exception as e:
5142        bt.logging.error(f"Error in forward: {e}, - {traceback.format_exc()}")
5143        if self.config.neuron.stop_on_forward_exception:
5144            self.should_exit = True
5145
5146
5147---
5148File: /coding/helpers/git.py
5149---
5150
5151import os
5152import shutil
5153import tempfile
5154import weakref
5155from git import Repo
5156
5157class GitRepo:
5158    def __init__(self, repo_name: str, commit_hash: str):
5159        """
5160        Initialize a Git repository object that manages cloning and cleanup.
5161        
5162        Args:
5163            repo_name (str): Name/URL of the repository to clone
5164            commit_hash (str): Specific commit hash to checkout
5165            
5166        Raises:
5167            git.exc.GitCommandError: If repository does not exist or other git error occurs
5168        """
5169        self.repo_name = repo_name
5170        self.commit_hash = commit_hash
5171        self.temp_dir = tempfile.mkdtemp()
5172        self.repo = None
5173        self._initialize_repo()
5174            
5175    def _initialize_repo(self):
5176        """Initialize/reinitialize the git repository"""
5177        if self.temp_dir and os.path.exists(self.temp_dir) and os.listdir(self.temp_dir):
5178            self._finalizer = weakref.finalize(self, self._cleanup)
5179            return
5180        # Ensure repo name includes full GitHub URL if not already
5181        if not self.repo_name.startswith(('http://', 'https://', 'git://')):
5182            self.repo_name = f"https://github.com/{self.repo_name}"
5183            
5184        # Clone repo with minimal history and specific commit
5185        self.repo = Repo.clone_from(
5186            self.repo_name,
5187            self.temp_dir, 
5188            depth=1,  # Only get most recent commit
5189            no_single_branch=True,  # Allow fetching specific commit
5190            no_tags=True  # Don't fetch any tags
5191        )
5192        # Fetch only the specific commit
5193        self.repo.git.fetch('origin', self.commit_hash, depth=1)
5194        self.repo.git.checkout(self.commit_hash)
5195        # Register cleanup to be called when object is deleted
5196        self._finalizer = weakref.finalize(self, self._cleanup)
5197
5198    def __getstate__(self):
5199        """Called when pickling - return state without repo objects"""
5200        state = self.__dict__.copy()
5201        # Remove unpicklable objects
5202        state['repo'] = None
5203        state['_finalizer'] = None
5204        return state
5205
5206    def __setstate__(self, state):
5207        """Called when unpickling - restore state and reinitialize repo"""
5208        self.__dict__.update(state)
5209        if self.temp_dir == None:
5210            self.temp_dir = tempfile.mkdtemp()
5211        self._initialize_repo()
5212        
5213    def _cleanup(self):
5214        """
5215        Clean up the temporary directory containing the cloned repository.
5216        """
5217        try:
5218            if self.temp_dir and os.path.exists(self.temp_dir):
5219                shutil.rmtree(self.temp_dir)
5220        except Exception as e:
5221            print(f"Error during cleanup: {str(e)}")
5222            
5223    @property 
5224    def path(self) -> str:
5225        """
5226        Get the path to the cloned repository.
5227        
5228        Returns:
5229            str: Path to the repository directory
5230        """
5231        return self.temp_dir
5232    
5233    @property 
5234    def files(self) -> dict[str, str]:
5235        logic = {}
5236        # Read all files in test-submission directory
5237        for root, dirs, files in os.walk(self.path):
5238            # Skip __pycache__ directories
5239            if '__pycache__' in dirs:
5240                dirs.remove('__pycache__')
5241                
5242            # Get relative path from test_submission_dir
5243            rel_path = os.path.relpath(root, self.path)
5244            
5245            # Process all files in current directory
5246            for filename in files:
5247                # Skip __pycache__ files
5248                if '__pycache__' in filename:
5249                    continue
5250                    
5251                file_path = os.path.join(root, filename)
5252                # Get the relative path for the logic dict key
5253                if rel_path == '.':
5254                    logic_key = filename
5255                else:
5256                    logic_key = os.path.join(rel_path, filename)
5257                    
5258                with open(file_path, 'r', encoding='latin-1') as f:
5259                    logic[logic_key] = f.read()
5260        return logic
5261    
5262    def __enter__(self):
5263        return self
5264        
5265    def __exit__(self, exc_type, exc_val, exc_tb):
5266        self._cleanup()
5267
5268
5269---
5270File: /coding/helpers/parser.py
5271---
5272
5273import re
5274
5275def extract_python_code(markdown_string):
5276    """
5277    Extracts Python code blocks from a Markdown string.
5278    
5279    Parameters:
5280        markdown_string (str): The Markdown string to extract Python code from.
5281    
5282    Returns:
5283        list of str: A list of extracted Python code blocks.
5284    """
5285    # Regular expression to match Python code blocks
5286    python_code_pattern = re.compile(r'```python\n(.*?)\n```', re.DOTALL)
5287    
5288    # Find all Python code blocks
5289    python_code_blocks = python_code_pattern.findall(markdown_string)
5290    
5291    return python_code_blocks
5292
5293
5294---
5295File: /coding/helpers/rewrite.py
5296---
5297
5298import random
5299
5300REWRITE_REASONS = [
5301    "more concise",
5302    "more verbose",
5303    "more pythonic",
5304    "more efficient",
5305    "more readable",
5306    "more correct",
5307    "more efficient",
5308    "a little different",
5309    "super concise",
5310    "super verbose",
5311    "super pythonic",
5312    "super efficient",
5313    "super readable",
5314    "super correct",
5315]
5316
5317def rewrite_code(code: str, model: str) -> str:
5318    res = model.invoke(f"Rewrite the following code to be {random.choice(REWRITE_REASONS)}, make sure it does the same thing though: {code}").content
5319
5320    if "```" in res:
5321        start = res.find("```") + 3  # Skip the backticks and newline
5322        start = res.find("\n", start) + 1
5323
5324        end = res.rfind("```")
5325        res = res[start:end].strip()
5326    return res
5327
5328
5329
5330---
5331File: /coding/helpers/selector.py
5332---
5333
5334# The MIT License (MIT)
5335# Copyright © 2024 Yuma Rao
5336# Copyright © 2023 Opentensor Foundation
5337# Copyright © 2024 Macrocosmos
5338
5339# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
5340# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
5341# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
5342# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
5343
5344# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
5345# the Software.
5346
5347# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
5348# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
5349# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
5350# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
5351# DEALINGS IN THE SOFTWARE.
5352
5353import random
5354
5355
5356class Selector:
5357    def __init__(self, seed=None):
5358        self.seed = seed
5359        self.rng = random.Random(seed)
5360
5361    def __call__(self, items, weights=None):
5362        return self.rng.choices(items, weights=weights)[0]
5363
5364
5365class PageRankSelector(Selector):
5366    """Preferentially chooses the items at the top of the list, under the assumption that they are more important."""
5367
5368    def __init__(self, seed=None, alpha=0.85):
5369        super().__init__(seed)
5370        self.alpha = alpha
5371
5372    def __call__(self, items):
5373        weights = [self.alpha**i for i in range(len(items))]
5374        return self.rng.choices(items, weights=weights)[0]
5375
5376
5377class SimilaritySelector(Selector):
5378    """Chooses the item most similar to the query."""
5379
5380    def __init__(self, seed=None, similarity_fn=None):
5381        super().__init__(seed)
5382        self.similarity_fn = similarity_fn
5383
5384    def __call__(self, query, items):
5385        return max(items, key=lambda item: self.similarity_fn(query, item))
5386
5387
5388class TopSelector(Selector):
5389    """Chooses the top item."""
5390
5391    def __init__(self, seed=None):
5392        super().__init__(seed)
5393
5394    def __call__(self, items):
5395        return items[0]
5396
5397
5398if __name__ == "__main__":
5399    selector = Selector(seed=42)
5400    items = range(10)
5401    item = selector(items)
5402
5403    assert item in items, "Selector should return one of the items"
5404
5405
5406
5407---
5408File: /coding/helpers/swebench.py
5409---
5410
5411from __future__ import annotations
5412
5413import re
5414import requests
5415import time
5416
5417from bs4 import BeautifulSoup
5418from ghapi.core import GhApi
5419from fastcore.net import HTTP404NotFoundError, HTTP403ForbiddenError
5420from typing import Callable, Iterator, Optional
5421from unidiff import PatchSet
5422
5423
5424class Repo:
5425    def __init__(self, owner: str, name: str, token: Optional[str] = None):
5426        """
5427        Init to retrieve target repository and create ghapi tool
5428
5429        Args:
5430            owner (str): owner of target repository
5431            name (str): name of target repository
5432            token (str): github token
5433        """
5434        self.owner = owner
5435        self.name = name
5436        self.token = token
5437        self.api = GhApi(token=token)
5438        self.repo = self.call_api(self.api.repos.get, owner=owner, repo=name)
5439        self.size = self.repo.size if self.repo else 0
5440
5441    def call_api(self, func: Callable, **kwargs) -> dict|None:
5442        """
5443        API call wrapper with rate limit handling (checks every 5 minutes if rate limit is reset)
5444
5445        Args:
5446            func (callable): API function to call
5447            **kwargs: keyword arguments to pass to API function
5448        Return:
5449            values (dict): response object of `func`
5450        """
5451        for _ in range(10):
5452            try:
5453                values = func(**kwargs)
5454                return values
5455            except HTTP403ForbiddenError as e:
5456                for _ in range(10):
5457                    rl = self.api.rate_limit.get()
5458                    if rl.resources.core.remaining > 0:
5459                        break
5460            except HTTP404NotFoundError as e:
5461                return None
5462
5463    def extract_resolved_issues(self, pull: dict) -> list[str]:
5464        """
5465        Extract list of issues referenced by a PR
5466
5467        Args:
5468            pull (dict): PR dictionary object from GitHub
5469        Return:
5470            resolved_issues (list): list of issue numbers referenced by PR
5471        """
5472        # Define 1. issue number regex pattern 2. comment regex pattern 3. keywords
5473        issues_pat = re.compile(r"(\w+)\s+\#(\d+)")
5474        comments_pat = re.compile(r"(?s)<!--.*?-->")
5475        keywords = {
5476            "close",
5477            "closes",
5478            "closed",
5479            "fix",
5480            "fixes",
5481            "fixed",
5482            "resolve",
5483            "resolves",
5484            "resolved",
5485            "complete",
5486            "completed",
5487            "finish",
5488            "finishes",
5489            "finished",
5490        }
5491
5492        # Construct text to search over for issue numbers from PR body and commit messages
5493        text = pull.title if pull.title else ""
5494        text += "\n" + (pull.body if pull.body else "")
5495        commits = self.get_all_loop(
5496            self.api.pulls.list_commits, pull_number=pull.number, quiet=True
5497        )
5498        commit_messages = [commit.commit.message for commit in commits]
5499        commit_text = "\n".join(commit_messages) if commit_messages else ""
5500        text += "\n" + commit_text
5501        # Remove comments from text
5502        text = comments_pat.sub("", text)
5503        # Look for issue numbers in text via scraping <keyword, number> patterns
5504        references = dict(issues_pat.findall(text))
5505        resolved_issues = list()
5506        if references:
5507            for word, issue_num in references.items():
5508                if word.lower() in keywords:
5509                    resolved_issues.append(issue_num)
5510        return resolved_issues
5511
5512    def get_all_loop(
5513        self,
5514        func: Callable,
5515        per_page: int = 100,
5516        num_pages: Optional[int] = None,
5517        quiet: bool = False,
5518        start_page = 1,
5519        **kwargs,
5520    ) -> Iterator:
5521        """
5522        Return all values from a paginated API endpoint.
5523        
5524        Args:
5525            func (callable): API function to call
5526            per_page (int): number of values to return per page
5527            num_pages (int): number of pages to return
5528            quiet (bool): whether to print progress
5529            **kwargs: keyword arguments to pass to API function
5530        """
5531        page = start_page
5532        args = {
5533            "owner": self.owner,
5534            "repo": self.name,
5535            "per_page": per_page,
5536            **kwargs,
5537        }
5538        for _ in range(10):
5539            try:
5540                # Get values from API call
5541                values = func(**args, page=page)
5542                yield from values
5543                if len(values) == 0:
5544                    break
5545                if not quiet:
5546                    rl = self.api.rate_limit.get()
5547                if num_pages is not None and page >= num_pages:
5548                    break
5549                page += 1
5550            except Exception as e:
5551                # Rate limit handling
5552                for _ in range(10):
5553                    rl = self.api.rate_limit.get()
5554                    if rl.resources.core.remaining > 0:
5555                        break
5556
5557    def get_all_issues(
5558        self,
5559        per_page: int = 100,
5560        num_pages: Optional[int] = None,
5561        direction: str = "desc",
5562        sort: str = "created",
5563        state: str = "closed",
5564        quiet: bool = False,
5565    ) -> Iterator:
5566        """
5567        Wrapper for API call to get all issues from repo
5568
5569        Args:
5570            per_page (int): number of issues to return per page
5571            num_pages (int): number of pages to return
5572            direction (str): direction to sort issues
5573            sort (str): field to sort issues by
5574            state (str): state of issues to look for
5575            quiet (bool): whether to print progress
5576        """
5577        issues = self.get_all_loop(
5578            self.api.issues.list_for_repo,
5579            num_pages=num_pages,
5580            per_page=per_page,
5581            direction=direction,
5582            sort=sort,
5583            state=state,
5584            quiet=quiet,
5585        )
5586        return issues
5587
5588    def get_all_pulls(
5589        self,
5590        per_page: int = 100,
5591        num_pages: Optional[int] = None,
5592        direction: str = "desc",
5593        sort: str = "created",
5594        state: str = "closed",
5595        quiet: bool = False,
5596        start_page = 1,
5597    ) -> Iterator:
5598        """
5599        Wrapper for API call to get all PRs from repo
5600
5601        Args:
5602            per_page (int): number of PRs to return per page
5603            num_pages (int): number of pages to return
5604            direction (str): direction to sort PRs
5605            sort (str): field to sort PRs by
5606            state (str): state of PRs to look for
5607            quiet (bool): whether to print progress
5608        """
5609        pulls = self.get_all_loop(
5610            self.api.pulls.list,
5611            num_pages=num_pages,
5612            direction=direction,
5613            per_page=per_page,
5614            sort=sort,
5615            state=state,
5616            quiet=quiet,
5617            start_page=start_page,
5618        )
5619        return pulls
5620
5621
5622def extract_problem_statement_and_hints(pull: dict, repo: Repo) -> tuple[str, str]:
5623    """
5624    Extract problem statement from issues associated with a pull request
5625
5626    Args:
5627        pull (dict): PR dictionary object from GitHub
5628        repo (Repo): Repo object
5629    Return:
5630        text (str): problem statement
5631        hints (str): hints
5632    """
5633    if repo.name == "django":
5634        return extract_problem_statement_and_hints_django(pull, repo)
5635    text = ""
5636    all_hint_texts = list()
5637    for issue_number in pull["resolved_issues"]:
5638        issue = repo.call_api(
5639            repo.api.issues.get,
5640            owner=repo.owner,
5641            repo=repo.name,
5642            issue_number=issue_number,
5643        )
5644        if issue is None:
5645            continue
5646        title = issue.title if issue.title else ""
5647        body = issue.body if issue.body else ""
5648        text += f"{title}\n{body}\n"
5649        issue_number = issue.number
5650        hint_texts = _extract_hints(pull, repo, issue_number)
5651        hint_text = "\n".join(hint_texts)
5652        all_hint_texts.append(hint_text)
5653    return text, "\n".join(all_hint_texts) if all_hint_texts else ""
5654
5655
5656def _extract_hints(pull: dict, repo: Repo, issue_number: int) -> list[str]:
5657    """
5658    Extract hints from comments associated with a pull request (before first commit)
5659
5660    Args:
5661        pull (dict): PR dictionary object from GitHub
5662        repo (Repo): Repo object
5663        issue_number (int): issue number
5664    Return:
5665        hints (list): list of hints
5666    """
5667    # Get all commits in PR
5668    commits = repo.get_all_loop(
5669        repo.api.pulls.list_commits, pull_number=pull["number"], quiet=True
5670    )
5671    commits = list(commits)
5672    if len(commits) == 0:
5673        # If there are no comments, return no hints
5674        return []
5675    # Get time of first commit in PR
5676    commit_time = commits[0].commit.author.date  # str
5677    commit_time = time.mktime(time.strptime(commit_time, "%Y-%m-%dT%H:%M:%SZ"))
5678    # Get all comments in PR
5679    all_comments = repo.get_all_loop(
5680        repo.api.issues.list_comments, issue_number=issue_number, quiet=True
5681    )
5682    all_comments = list(all_comments)
5683    # Iterate through all comments, only keep comments created before first commit
5684    comments = list()
5685    for comment in all_comments:
5686        comment_time = time.mktime(
5687            time.strptime(comment.updated_at, "%Y-%m-%dT%H:%M:%SZ")
5688        )  # use updated_at instead of created_at
5689        if comment_time < commit_time:
5690            comments.append(comment)
5691        else:
5692            break
5693        # only include information available before the first commit was created
5694    # Keep text from comments
5695    comments = [comment.body for comment in comments]
5696    return comments
5697
5698
5699def extract_patches(pull: dict, repo: Repo) -> tuple[str, str]:
5700    """
5701    Get patch and test patch from PR
5702
5703    Args:
5704        pull (dict): PR dictionary object from GitHub
5705        repo (Repo): Repo object
5706    Return:
5707        patch_change_str (str): gold patch
5708        patch_test_str (str): test patch
5709    """
5710    patch = requests.get(pull["diff_url"]).text
5711    patch_test = ""
5712    patch_fix  = ""
5713    for hunk in PatchSet(patch):
5714        if any(
5715            test_word in hunk.path for test_word in
5716            ['test', 'tests', 'e2e', 'testing']
5717        ):
5718            patch_test += str(hunk)
5719        else:
5720            patch_fix += str(hunk)
5721    return patch_fix, patch_test
5722
5723
5724### MARK: Repo Specific Parsing Functions ###
5725def extract_problem_statement_and_hints_django(
5726    pull: dict, repo: Repo
5727) -> tuple[str, list[str]]:
5728    """
5729    Get problem statement and hints from issues associated with a pull request
5730
5731    Args:
5732        pull (dict): PR dictionary object from GitHub
5733        repo (Repo): Repo object
5734    Return:
5735        text (str): problem statement
5736        hints (str): hints
5737    """
5738    text = ""
5739    all_hints_text = list()
5740    for issue_number in pull["resolved_issues"]:
5741        url = f"https://code.djangoproject.com/ticket/{issue_number}"
5742        resp = requests.get(url)
5743        if resp.status_code != 200:
5744            continue
5745        soup = BeautifulSoup(resp.text, "html.parser")
5746
5747        # Get problem statement (title + body)
5748        issue_desc = soup.find("div", {"id": "ticket"})
5749        title = issue_desc.find("h1", class_="searchable").get_text()
5750        title = re.sub(r"\s+", " ", title).strip()
5751        body = issue_desc.find("div", class_="description").get_text()
5752        body = re.sub(r"\n+", "\n", body)
5753        body = re.sub(r"    ", "\t", body)
5754        body = re.sub(r"[ ]{2,}", " ", body).strip()
5755        text += f"{title}\n{body}\n"
5756
5757        # Get time of first commit in PR
5758        commits = repo.get_all_loop(
5759            repo.api.pulls.list_commits, pull_number=pull["number"], quiet=True
5760        )
5761        commits = list(commits)
5762        if len(commits) == 0:
5763            continue
5764        commit_time = commits[0].commit.author.date
5765        commit_time = time.mktime(time.strptime(commit_time, "%Y-%m-%dT%H:%M:%SZ"))
5766
5767        # Get all comments before first commit
5768        comments_html = soup.find("div", {"id": "changelog"})
5769        div_blocks = comments_html.find_all("div", class_="change")
5770        # Loop through each div block
5771        for div_block in div_blocks:
5772            # Find the comment text and timestamp
5773            comment_resp = div_block.find("div", class_="comment")
5774            timestamp_resp = div_block.find("a", class_="timeline")
5775            if comment_resp is None or timestamp_resp is None:
5776                continue
5777
5778            comment_text = re.sub(r"\s+", " ", comment_resp.text).strip()
5779            timestamp = timestamp_resp["title"]
5780            if timestamp.startswith("See timeline at "):
5781                timestamp = timestamp[len("See timeline at ") :]
5782            if "/" in timestamp:
5783                timestamp = time.mktime(time.strptime(timestamp, "%m/%d/%y %H:%M:%S"))
5784            elif "," in timestamp:
5785                timestamp = time.mktime(time.strptime(timestamp, "%b %d, %Y, %I:%M:%S %p"))
5786            else:
5787                raise ValueError(f"Timestamp format not recognized: {timestamp}")
5788
5789            # Append the comment and timestamp as a tuple to the comments list
5790            if timestamp < commit_time:
5791                all_hints_text.append((comment_text, timestamp))
5792
5793    return text, all_hints_text
5794
5795
5796
5797---
5798File: /coding/miners/finetune.py
5799---
5800
5801from coding.protocol import HFModelSynapse
5802
5803def miner_process(self, synapse: HFModelSynapse) -> HFModelSynapse:
5804    """
5805    The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
5806    """
5807    synapse.model_name = "microsoft/Phi-3-mini-128k-instruct"
5808    synapse.competition_id = 1
5809
5810    return synapse
5811
5812
5813
5814---
5815File: /coding/miners/openai_miner.py
5816---
5817
5818import os
5819import time
5820import bittensor as bt
5821from starlette.types import Send
5822from functools import partial
5823from typing import Dict, Awaitable
5824from langchain_openai import OpenAI
5825from dotenv import load_dotenv, find_dotenv
5826from langchain.prompts import PromptTemplate
5827from langchain_core.output_parsers import StrOutputParser
5828from langchain_core.runnables.base import RunnableSequence
5829
5830from coding.protocol import StreamCodeSynapse
5831
5832
5833def miner_init(self):
5834    """
5835    Initializes the miner. This function is called once when the miner is created.
5836    """
5837    _ = load_dotenv(find_dotenv())
5838    api_key = os.environ.get("OPENAI_API_KEY", "EMPTY")
5839    # Set openai key and other args
5840    self.model = OpenAI(
5841        api_key=api_key,
5842        model_name=self.config.neuron.model_id,
5843        max_tokens=2048,
5844        temperature=0.7,
5845    )
5846
5847def miner_process(self, synapse: StreamCodeSynapse) -> Awaitable:
5848    """
5849    The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
5850    """
5851    async def _forward(
5852        self,
5853        query: str,
5854        init_time: float,
5855        timeout_threshold: float,
5856        chain: RunnableSequence,
5857        chain_formatter: Dict[str, str],
5858        send: Send,
5859    ):
5860        buffer = []
5861        temp_completion = ""  # for wandb logging
5862        timeout_reached = False
5863
5864        try:
5865            # Langchain built in streaming. 'astream' also available for async
5866            for token in chain.stream(chain_formatter):
5867                buffer.append(token)
5868
5869                if time.time() - init_time > timeout_threshold:
5870                    bt.logging.debug(f"⏰ Timeout reached, stopping streaming")
5871                    timeout_reached = True
5872                    break
5873
5874                if len(buffer) == self.config.neuron.streaming_batch_size:
5875                    joined_buffer = "".join(buffer)
5876                    temp_completion += joined_buffer
5877                    bt.logging.debug(f"Streamed tokens: {joined_buffer}")
5878
5879                    await send(
5880                        {
5881                            "type": "http.response.body",
5882                            "body": joined_buffer.encode("utf-8"),
5883                            "more_body": True,
5884                        }
5885                    )
5886                    buffer = []
5887
5888            if (
5889                buffer and not timeout_reached
5890            ):  # Don't send the last buffer of data if timeout.
5891                joined_buffer = "".join(buffer)
5892                await send(
5893                    {
5894                        "type": "http.response.body",
5895                        "body": joined_buffer.encode("utf-8"),
5896                        "more_body": False,
5897                    }
5898                )
5899
5900        except Exception as e:
5901            bt.logging.error(f"Error in forward: {e}")
5902            if self.config.neuron.stop_on_forward_exception:
5903                self.should_exit = True
5904    
5905    bt.logging.debug(f"📧 Query received, forwarding synapse: {synapse}")
5906
5907    prompt = PromptTemplate.from_template(
5908        "{query}"
5909    )
5910    chain = prompt | self.model | StrOutputParser()
5911
5912    query = synapse.query
5913
5914    chain_formatter = {"query": query}
5915
5916    init_time = time.time()
5917    timeout_threshold = synapse.timeout
5918
5919    token_streamer = partial(
5920        _forward,
5921        self,
5922        query,
5923        init_time,
5924        timeout_threshold,
5925        chain,
5926        chain_formatter,
5927    )
5928    return synapse.create_streaming_response(token_streamer)
5929
5930
5931---
5932File: /coding/miners/qwen_mistral_miner.py
5933---
5934
5935import json
5936import time
5937import traceback
5938import bittensor as bt
5939from typing import Awaitable
5940from functools import partial
5941from langchain.prompts import PromptTemplate
5942from langchain_openai import OpenAI, ChatOpenAI
5943from coding.protocol import StreamCodeSynapse
5944from coding.helpers import chain_forward, string_forward
5945
5946
5947def parse_diff(diff_string):
5948    lines = diff_string.splitlines()
5949    file_diffs = {}
5950    current_file = None
5951    diff_content = []
5952    is_diff_block = False
5953
5954    for line in lines:
5955        if "diff --git" in line:
5956            if current_file and diff_content:
5957                file_diffs[current_file] = "\n".join(diff_content)
5958            current_file = line.split()[-1]
5959            diff_content = []
5960            is_diff_block = False
5961        elif line.startswith("---") or line.startswith("+++"):
5962            # Ignore these lines, as they indicate the old/new file path
5963            continue
5964        elif line.startswith("@@"):
5965            is_diff_block = True
5966            continue
5967        elif is_diff_block:
5968            diff_content.append(line)
5969
5970    if current_file and diff_content:
5971        file_diffs[current_file] = "\n".join(diff_content)
5972
5973    return file_diffs
5974
5975
5976def miner_init(self):
5977    """
5978    Initializes the miner. This function is called once when the miner is created.
5979    """
5980
5981    def model_factory(
5982        api_base="http://localhost:8000/v1",
5983        model_name=self.config.neuron.model_id,
5984        max_tokens=4096,
5985        temperature=0.7,
5986        top_p=1.0,
5987        chat=False,
5988    ):
5989        if chat:
5990            return ChatOpenAI(
5991                openai_api_base=api_base,
5992                openai_api_key="EMPTY",
5993                model_name=model_name,
5994                max_tokens=max_tokens,
5995                temperature=temperature,
5996                top_p=top_p,
5997                streaming=True,
5998            )
5999        return OpenAI(
6000            openai_api_base=api_base,
6001            openai_api_key="EMPTY",
6002            model_name=model_name,
6003            max_tokens=max_tokens,
6004            temperature=temperature,
6005            top_p=top_p,
6006            streaming=True,
6007        )
6008
6009    self.model_factory = model_factory
6010
6011    self.model = model_factory(chat=True)
6012    self.mistral = model_factory(
6013        api_base="http://localhost:8001/v1",
6014        model_name="thesven/Mistral-7B-Instruct-v0.3-GPTQ",
6015        chat=True,
6016    )
6017
6018
6019def miner_process(self, synapse: StreamCodeSynapse) -> Awaitable:
6020    """
6021    The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
6022    """
6023
6024    if synapse.messages:
6025        query = synapse.messages[-1].content
6026
6027    extra_info = {}
6028    stop = None
6029    chain = None
6030    chain_formatter = None
6031    query = synapse.query
6032
6033    bt.logging.debug(f"📧 Query received, forwarding synapse: {synapse}")
6034    if "<|fim_hole|>" in synapse.query and not synapse.files:
6035        chain = self.model_factory(chat=False)
6036        chain_formatter = f"<fim_prefix>{synapse.query.replace('<|fim_hole|>', '<fim_suffix>')}<fim_middle>"
6037        stop = [
6038            "<fim_prefix>",
6039            "<fim_suffix>",
6040            "<fim_middle>",
6041            "//",
6042            "<|end▁of▁sentence|>",
6043            "\n\n",
6044            "\r\n\r\n",
6045            "/src/",
6046            "#- coding: utf-8",
6047            "```",
6048            "\ndef",
6049            "\nclass",
6050            '\n"""#',
6051        ]
6052    elif synapse.messages and synapse.files:
6053        chain = self.model
6054        for file in synapse.files:
6055            file.content = file.content.replace("}", "}}").replace("{", "{{")
6056            filestring += f"#{file.path}\n{file.content}\n"
6057        chain_formatter = synapse.messages + [
6058            {"role": "user", "content": f"{filestring}\n{synapse.query}"}
6059        ]
6060    elif synapse.messages:
6061        chain = self.model
6062        synapse.messages[0].role = "user"
6063        chain_formatter = [msg.dict() for msg in synapse.messages]
6064    elif "The following issue is:\n\n" in synapse.query:
6065        # this is a SWE-Bench style task
6066        prompt = synapse.query + "\n"
6067        for file in synapse.files:
6068            prompt += f"#Filename: {file.path}\n{file.content}\n"
6069        prompt += "Respond only with the patch, only modify the files you have been provided."
6070        model_res = (
6071            self.mistral.invoke([{"role": "user", "content": prompt[0:15000]}])
6072            .content.replace("<patch>", "")
6073            .replace("</patch>", "")
6074            .replace("b/", "")
6075            .replace("a/", "")
6076        )
6077        if "```" in model_res:
6078            model_res = model_res.split("```")[1]
6079        model_res = json.dumps(parse_diff(model_res))
6080        return synapse.create_streaming_response(partial(string_forward, model_res))
6081    elif synapse.files and "<|fim_hole|>" in synapse.query:
6082        chain = self.model_factory(chat=False)
6083        string = ""
6084        for file in synapse.files:
6085            if "path" not in file:
6086                file.path = ""
6087            string += f"<file_sep>{file.path}\n{file.content}\n"
6088        chain_formatter = (
6089            string
6090            + "<fim_prefix>"
6091            + synapse.query.replace("<|fim_hole|>", "<fim_middle>")
6092        )
6093    elif "write code to" in synapse.query:
6094        string = ""
6095        chain = self.mistral
6096        for file in synapse.files:
6097            if "path" not in file:
6098                file.path = ""
6099            string += f"{file.path}\n{file.content}\n"
6100        if string:
6101            "Using the above files, and responding only with python code \n"
6102        chain_formatter = string + synapse.query
6103    else:
6104        chain = self.model
6105        chain_formatter = synapse.query
6106    if stop:
6107        self.model = self.model.bind(stop=stop)
6108    if not chain:
6109        prompt = PromptTemplate.from_template("{query}")
6110        chain = prompt | self.model
6111
6112    init_time = time.time()
6113    timeout_threshold = synapse.timeout
6114
6115    streamer = partial(
6116        chain_forward,
6117        self,
6118        synapse.query,
6119        synapse.files,
6120        extra_info,
6121        init_time,
6122        timeout_threshold,
6123        chain,
6124        chain_formatter,
6125    )
6126    return synapse.create_streaming_response(streamer)
6127
6128
6129
6130---
6131File: /coding/miners/swe.py
6132---
6133
6134import os
6135from coding.protocol import LogicSynapse
6136
6137def miner_process(self, synapse: LogicSynapse) -> LogicSynapse:
6138    """
6139    The miner process function is called every time the miner receives a request. This function should contain the main logic of the miner.
6140    """
6141    logic = {}
6142    test_submission_dir = ""
6143
6144    # Read all files in test-submission directory
6145    for root, dirs, files in os.walk(test_submission_dir):
6146        # Skip __pycache__ directories
6147        if '__pycache__' in dirs:
6148            dirs.remove('__pycache__')
6149            
6150        # Get relative path from test_submission_dir
6151        rel_path = os.path.relpath(root, test_submission_dir)
6152        
6153        # Process all files in current directory
6154        for filename in files:
6155            # Skip __pycache__ files
6156            if '__pycache__' in filename:
6157                continue
6158                
6159            file_path = os.path.join(root, filename)
6160            # Get the relative path for the logic dict key
6161            if rel_path == '.':
6162                logic_key = filename
6163            else:
6164                logic_key = os.path.join(rel_path, filename)
6165                
6166            with open(file_path, 'r', encoding='latin-1') as f:
6167                logic[logic_key] = f.read()
6168    synapse.logic = logic
6169    return synapse
6170
6171
6172
6173---
6174File: /coding/rewards/__init__.py
6175---
6176
6177from .reward import (
6178    BaseRewardModel,
6179    RewardResult,
6180    RewardEvent,
6181    BatchRewardOutput,
6182    RewardModelTypeEnum,
6183)
6184# from .pipeline import RewardPipeline
6185from .diffsim import DiffSimModel
6186from .codesim import CodeSimModel
6187from .speed import SpeedModel
6188from .validcode import ValidCodeModel
6189
6190
6191
6192---
6193File: /coding/rewards/codesim.py
6194---
6195
6196import time
6197import numpy as np
6198from typing import List
6199from code_bert_score import BERTScorer
6200
6201from .reward import (
6202    BaseRewardModel,
6203    BatchRewardOutput,
6204    RewardModelTypeEnum,
6205)
6206from coding.helpers.cosine import normalize_cosim
6207
6208
6209class CodeSimModel(BaseRewardModel):
6210    @property
6211    def name(self) -> str:
6212        return "codesim"
6213
6214    def __init__(self, code_scorer=None, **kwargs):
6215        super().__init__()
6216        if code_scorer is None:
6217            self.code_scorer = BERTScorer(lang="python")
6218        else:
6219            self.code_scorer = code_scorer
6220
6221    def similarity(self, reference: str, completion: str) -> float:
6222        if not reference:
6223            return 0
6224        if not completion:
6225            return 0
6226        P, R, F1 = self.code_scorer.score([completion], [reference])
6227        score = F1.tolist()[0]
6228        return normalize_cosim(score)
6229    
6230    def similarity_batch(self, references: str|list, completions: List[str]) -> List[float]:
6231        if not references or not completions:
6232            return [0] * len(completions)
6233
6234        # Filter out None or empty strings and keep track of their indices
6235        valid_completions = [(idx, comp) for idx, comp in enumerate(completions) if comp]
6236        if not valid_completions:
6237            return [0] * len(completions)
6238
6239        # Unzip the indices and valid completions
6240        indices, filtered_completions = zip(*valid_completions)
6241        
6242        if not isinstance(references, list):
6243            references = [references] * len(filtered_completions)
6244        
6245        # Score only the valid completions
6246        P, R, F1 = self.code_scorer.score(filtered_completions, references)
6247        scores = F1.tolist()
6248
6249        # Initialize a result list with zeros for all completions
6250        result_scores = [0] * len(completions)
6251
6252        # Place the normalized scores back in their original positions
6253        for idx, score in zip(indices, scores):
6254            if score > 1:
6255                score = 1.0
6256            result_scores[idx] = normalize_cosim(score)
6257
6258        return result_scores
6259
6260    def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6261        """
6262        Get the score between a reference string and a list of completion strings.
6263        """
6264
6265        t0 = time.time()
6266        rewards = self.similarity_batch(reference, completions)
6267        total_time = time.time() - t0
6268        timings = [total_time] * len(completions)  # Assuming equal distribution of time for each completion
6269
6270        output = BatchRewardOutput(
6271            rewards=rewards,
6272            timings=timings,
6273            extra_info={}
6274        )
6275
6276        return output
6277
6278
6279---
6280File: /coding/rewards/debugrun.py
6281---
6282
6283
6284
6285
6286---
6287File: /coding/rewards/diffsim.py
6288---
6289
6290import time
6291import difflib
6292from typing import List
6293from .reward import (
6294    BaseRewardModel,
6295    BatchRewardOutput,
6296    RewardModelTypeEnum,
6297)
6298from coding.helpers.cosine import normalize_cosim
6299
6300
6301class DiffSimModel(BaseRewardModel):
6302    @property
6303    def name(self) -> str:
6304        return "diffsim"
6305
6306    def __init__(self):
6307        super().__init__()
6308    
6309    def similarity(self, reference: str, completion: str) -> float:
6310        if not completion:
6311            return 0
6312        sequence_matcher = difflib.SequenceMatcher(None, reference, completion)
6313        score = sequence_matcher.ratio()
6314        return normalize_cosim(score)
6315    
6316    def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6317        """
6318        Get the score between two strings.
6319        """
6320        rewards = []
6321        timings = []
6322        for completion in completions:
6323            t0 = time.time()
6324            rewards.append(self.similarity(reference, completion))
6325            timings.append(time.time() - t0)
6326        output = BatchRewardOutput(
6327            rewards=rewards,
6328            timings=timings,
6329            extra_info={}
6330        )
6331
6332        return output
6333
6334
6335
6336---
6337File: /coding/rewards/pipeline.py
6338---
6339
6340# The MIT License (MIT)
6341# Copyright © 2024 Yuma Rao
6342# Copyright © 2023 Opentensor Foundation
6343# Copyright © 2024 Macrocosmos
6344# Copyright © 2024 Brokespace
6345
6346
6347# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6348# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6349# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6350# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6351
6352# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6353# the Software.
6354
6355# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6356# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6357# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6358# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6359# DEALINGS IN THE SOFTWARE.
6360
6361from typing import List, Callable, Any
6362
6363from coding.tasks import TASKS
6364from .reward import BaseRewardModel
6365from .codesim import CodeSimModel
6366from .speed import SpeedModel
6367from .diffsim import DiffSimModel
6368from .validcode import ValidCodeModel
6369
6370REWARD_MODELS = {
6371    "codesim": CodeSimModel,
6372    DiffSimModel().name: DiffSimModel,
6373    SpeedModel().name: SpeedModel, 
6374    ValidCodeModel().name: ValidCodeModel,
6375    "self": None
6376}
6377
6378
6379class RewardPipeline:
6380    def __init__(self, selected_tasks: List[str], device: str, code_scorer):
6381        self.selected_tasks = selected_tasks
6382        self.device = device
6383        self.code_scorer = code_scorer
6384        self.validate_tasks()
6385        self.load_reward_pipeline()
6386
6387    def __getitem__(self, __key: str) -> BaseRewardModel:
6388        return self.reward_models.get(__key)
6389
6390    def get(self, __key: str) -> BaseRewardModel:
6391        return self.reward_models.get(__key)
6392
6393    def keys(self) -> List[str]: #TODO this might not be the right return type
6394        return self.reward_models.keys()
6395    
6396    def __repr__(self):
6397        return f"RewardPipeline({self.reward_models})"
6398
6399    def validate_tasks(self):
6400        for task in self.selected_tasks:
6401            if task not in TASKS:
6402                raise ValueError(
6403                    f"Task {task} not supported. Please choose from {TASKS.keys()}"
6404                )
6405            # Check that the reward_definition and penalty_definition are lists of dictionaries whose weights sum to one
6406            self._check_weights(task, "reward_definition", expected_weight=1)
6407            self._check_weights(task, "penalty_definition", expected_weight=None)
6408
6409    def _check_weights(self, task, definition, expected_weight):
6410        total_weight = 0
6411
6412        model_infos = getattr(TASKS[task], definition)
6413
6414        for model_info in model_infos:
6415            if not isinstance(model_info, dict):
6416                raise ValueError(
6417                    f"{definition} model {model_info} is not a dictionary."
6418                )
6419            if "weight" not in model_info:
6420                raise ValueError(
6421                    f"{definition} model {model_info} does not have a weight."
6422                )
6423
6424            weight = model_info["weight"]
6425            if not isinstance(weight, (float, int)):
6426                raise ValueError(
6427                    f"{definition} model {model_info} weight is not a float."
6428                )
6429            if not 0 <= weight <= 1:
6430                raise ValueError(
6431                    f"{definition} model {model_info} weight is not between 0 and 1."
6432                )
6433
6434            total_weight += weight
6435
6436        if (
6437            model_infos
6438            and expected_weight is not None
6439            and total_weight != expected_weight
6440        ):
6441            raise ValueError(
6442                f"{definition} model {model_infos} weights do not sum to {expected_weight} (sum={total_weight})"
6443            )
6444
6445    def load_reward_pipeline(self):
6446        """Dynamically loads the reward models required by the selected tasks so that we only use the necessary resources."""
6447        active_reward_models = []
6448
6449        for task in self.selected_tasks:
6450            active_reward_models += TASKS[task].reward_definition
6451            active_reward_models += TASKS[task].penalty_definition
6452
6453        # Instantiate only the required reward models
6454        reward_models = {}
6455        for model in active_reward_models:
6456            name = model.get("name")
6457            if not name:
6458                raise ValueError(f"Reward model {model} does not have a name. ")
6459            if name not in REWARD_MODELS.keys():
6460                raise ValueError(
6461                    f"Reward model {name} not supported. Please choose from {REWARD_MODELS.keys()}"
6462                )
6463            elif name in reward_models:  # Prevents duplicate reward models
6464                continue
6465
6466            cls = REWARD_MODELS[name]
6467            if name == "self":
6468                reward_models[name] = "self"
6469                continue
6470            params = {k: v for k, v in model.items() if k not in ["name", "weight"]}
6471            reward_models[name] = cls(device=self.device, code_scorer=self.code_scorer, **params)
6472
6473        self.reward_models = reward_models
6474
6475
6476---
6477File: /coding/rewards/reward.py
6478---
6479
6480# The MIT License (MIT)
6481# Copyright © 2024 Yuma Rao
6482# Copyright © 2023 Opentensor Foundation
6483# Copyright © 2024 Macrocosmos
6484# Copyright © 2024 Broke
6485
6486
6487# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6488# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6489# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6490# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6491
6492# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6493# the Software.
6494
6495# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6496# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6497# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6498# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6499# DEALINGS IN THE SOFTWARE.
6500
6501import time
6502import numpy as np
6503from enum import Enum
6504from abc import ABC, abstractmethod
6505from dataclasses import dataclass
6506from typing import List, Any, Union, Dict
6507
6508class RewardModelTypeEnum(Enum):
6509    WEIGHTED_REWARD = "reward"
6510    FILTER_REWARD = "filter"
6511    PENALTY = "penalty"
6512
6513@dataclass
6514class RewardEvent(ABC):
6515    """Contains rewards for all the responses in a batch"""
6516
6517    model_name: str
6518    rewards: Any
6519    rewards_normalized: Any
6520    timings: Any
6521    model_type: RewardModelTypeEnum
6522    batch_time: float
6523    extra_info: dict
6524
6525    # implement custom asdict to return a dict with the same keys as the dataclass using the model name
6526    def asdict(self) -> dict:
6527        return {
6528            f"{self.model_name}_raw_{self.model_type.value}": self.rewards.tolist(),
6529            f"{self.model_name}_{self.model_type.value}": self.rewards_normalized,
6530            f"{self.model_name}_{self.model_type.value}_timings": self.timings,
6531            f"{self.model_name}_{self.model_type.value}_batch_time": self.batch_time,
6532            f"{self.model_name}_{self.model_type.value}_extra_info": self.extra_info,
6533        }
6534
6535
6536class RewardResult:
6537    def __init__(self, reward_pipeline, task, response_event, device):
6538        """Passes the responses through the reward models and calculates the total reward
6539
6540        Args:
6541            reward_pipeline (RewardPipeline): List of all loaded/ative reward models
6542            task (Task): Task instance which contains reward_definition (list of reward model requirements) and a reference answer (str)
6543            response_event (DendriteResponseEvent): Network responses to the prompt
6544            device (str): Device to run the reward models on
6545        """
6546        self.reward_pipeline = reward_pipeline
6547        self.task = task
6548        self.response_event = response_event
6549        self.device = device
6550        self.task_rewards = task.reward_definition
6551        self.task_penalties = task.penalty_definition
6552        self.reward_events = self.reward_responses(
6553            reference=task.reference,
6554            models=self.task_rewards,
6555            reward_type=RewardModelTypeEnum.WEIGHTED_REWARD,
6556            task=task,
6557        )
6558        self.penalty_events = self.reward_responses(
6559            reference=task.reference,
6560            models=self.task_penalties,
6561            reward_type=RewardModelTypeEnum.PENALTY,
6562            task=task,
6563        )
6564        self.rewards = self.total_reward()
6565            
6566    def __state_dict__(self):
6567        state = {"rewards": self.rewards.tolist(), "highest_reward": self.rewards.max(), "average_reward": self.rewards.mean(), "task_name": self.task.name}
6568        for event in self.reward_events + self.penalty_events:
6569            state.update(event.asdict())
6570        return state
6571
6572    def reward_responses(
6573        self, reference: Union[str, List[str], Dict], models: List[dict], reward_type: RewardModelTypeEnum, task
6574    ) -> List[RewardEvent]:
6575        """Calculates the rewards for the responses given the task and returns a RewardEvent for each reward model
6576        reward_events: List[RewardEvent] = [
6577            RewardEvent(model_name='rouge', rewards=torch.zeros(50), timings=torch.zeros(50), ...),
6578            RewardEvent(model_name='relevance', rewards=torch.zeros(50), timings=torch.zeros(50), ...),
6579        ]
6580        """
6581        reward_events = []
6582        ref = reference
6583        for reward_info in models:
6584            # Select the reward model from preloaded reward model pipeline
6585            reward_model = self.reward_pipeline.get(reward_info["name"])
6586            if not reward_model:
6587                raise ValueError(
6588                    f"Reward model {reward_info['name']} not supported. Please choose from {self.reward_pipeline.keys()}"
6589                )
6590            if isinstance(reference, dict):
6591                ref = reference.get(reward_info["name"])
6592            
6593            if reward_model == "self":
6594                reward_event = self.task.reward_apply(self.response_event, reward_type=reward_type)
6595            else:
6596                # Compute the rewards for the responses given the prompt
6597                reward_event = reward_model.apply(
6598                    ref, self.response_event, reward_type=reward_type, task=task
6599                )
6600            reward_events.append(reward_event)
6601
6602        return reward_events
6603
6604    def total_reward(self):
6605        """Combines the rewards from all the reward models into a single reward tensor"""
6606        # Compute the rewards for the responses given the prompt
6607        rewards = np.zeros_like(self.response_event.uids, dtype=np.float64)
6608        for event in self.reward_events:
6609            for reward_info in filter(lambda x: x["name"] == event.model_name, self.task_rewards):
6610                rewards += reward_info["weight"] * event.rewards
6611
6612        for event in self.penalty_events:
6613            for reward_info in filter(lambda x: x["name"] == event.model_name, self.task_penalties):
6614                rewards *= 1 - reward_info["weight"] * event.rewards
6615        
6616        return rewards
6617
6618    def __str__(self):
6619        return f"{self.__class__.__name__}(rewards={self.rewards!r}, reward_events={self.reward_events!r}, penalty_events={self.penalty_events!r})"
6620
6621@dataclass
6622class BatchRewardOutput():
6623    rewards: Any
6624    timings: Any 
6625    extra_info: dict 
6626
6627    def __post_init__(self):
6628        self.rewards = np.asarray(self.rewards)
6629        self.timings = np.asarray(self.timings)
6630        if self.rewards.shape != self.timings.shape:
6631            raise ValueError(
6632                f"rewards.shape {self.rewards.shape} != timings.shape {self.timings.shape}"
6633            )
6634
6635        self.rewards_normalized = (self.rewards - self.rewards.min()) / (
6636            self.rewards.max() - self.rewards.min() + 1e-6
6637        )
6638        self.rewards_normalized = self.rewards_normalized.tolist()
6639
6640
6641class BaseRewardModel(ABC):
6642    @property
6643    @abstractmethod
6644    def name(self) -> str:
6645        ...
6646
6647    @abstractmethod
6648    def __init__(self, **kwargs):
6649        pass
6650
6651    @abstractmethod
6652    def reward(self, reference: str, completions: List[str]) -> BatchRewardOutput:
6653        pass
6654    
6655    def apply(self, reference: str, response_event, reward_type, task) -> RewardEvent:
6656        t0 = time.time()
6657        if self.name == "speed":
6658            batch_rewards_output = self.reward(response_event.timings)
6659        elif self.name == "validcode":
6660            if "<|fim_hole|>" in task.query:
6661                batch_rewards_output = self.reward(task.context.content, [task.query.replace("<|fim_hole|>", completion) for completion in response_event.completions], task.context.topic)
6662            else:
6663                batch_rewards_output = self.reward(task.context.content, response_event.completions, task.context.topic)
6664        # elif self.name == "debugrun": #TODO remove 
6665            # batch_rewards_output = self.reward(task, response_event)
6666        else:
6667            batch_rewards_output = self.reward(reference, response_event.completions)
6668        batch_rewards_time = time.time() - t0
6669        
6670        return RewardEvent(
6671            model_name=self.name,
6672            rewards=batch_rewards_output.rewards,
6673            rewards_normalized=batch_rewards_output.rewards_normalized,
6674            model_type=reward_type,
6675            batch_time=batch_rewards_time,
6676            extra_info=batch_rewards_output.extra_info,
6677            timings=batch_rewards_output.timings,
6678        )
6679
6680    def __repr__(self):
6681        return f"{self.__class__.__name__}(name={self.name})"
6682
6683
6684
6685---
6686File: /coding/rewards/speed.py
6687---
6688
6689import math
6690import time
6691from typing import List
6692from .reward import (
6693    BaseRewardModel,
6694    BatchRewardOutput,
6695)
6696
6697
6698class SpeedModel(BaseRewardModel):
6699    @property
6700    def name(self) -> str:
6701        return "speed"
6702
6703    def __init__(self, ideal_time: float = 12.0, decay_rate: float = 1.0, **kwargs):
6704        super().__init__()
6705        self.ideal_time = ideal_time
6706        self.decay_rate = decay_rate
6707
6708    def score_time(self, time_taken: float) -> float:
6709        """
6710        Calculates a score from 0 to 1 based on how fast an event occurs.
6711        The score decreases exponentially as the time taken increases beyond the ideal time.
6712
6713        :param time_taken: Time taken for the event in seconds.
6714        :param ideal_time: Ideal time for the event in seconds.
6715        :return: Score between 0 and 1.
6716        """
6717        if time_taken <= 0 or self.ideal_time <= 0:
6718            raise ValueError("Time taken and ideal time must be positive values.")
6719
6720        # Calculate the score using an exponential decay function
6721        score = math.exp(-self.decay_rate * (time_taken - self.ideal_time) / self.ideal_time)
6722
6723        # Ensure the score is between 0 and 1
6724        return max(0, min(1, score))
6725    
6726    def reward(self, times) -> BatchRewardOutput:
6727        """Get the score between two strings.
6728        """
6729
6730        rewards = []
6731        timings = []
6732
6733        for time_taken in times:
6734            t0 = time.time()
6735            rewards.append(self.score_time(time_taken))
6736            timings.append(time.time() - t0)
6737
6738        output = BatchRewardOutput(
6739            rewards=rewards,
6740            timings=timings,
6741            extra_info={"ideal_time": self.ideal_time},
6742        )
6743
6744        return output
6745
6746
6747---
6748File: /coding/rewards/validcode.py
6749---
6750
6751import ast
6752import time
6753import autopep8
6754from typing import List
6755from .reward import (
6756    BaseRewardModel,
6757    BatchRewardOutput,
6758    RewardModelTypeEnum,
6759)
6760
6761def fix_python_spacing(code_str):
6762    fixed_code = autopep8.fix_code(code_str)
6763    return fixed_code
6764
6765
6766class ValidCodeModel(BaseRewardModel):
6767    @property
6768    def name(self) -> str:
6769        return "validcode"
6770
6771    def __init__(self, **kwargs):
6772        super().__init__()
6773
6774    def score(self, reference: str, completions: List[str], language: str) -> List[float]:
6775        """
6776        Get the score between a reference string and a list of completion strings.
6777        """
6778        scores = []
6779        if language != "Python":
6780            return [0] * len(completions)
6781        for completion in completions:
6782            # Check if reference is valid python code
6783            try: 
6784                ast.parse(reference)
6785            except SyntaxError:
6786                scores.append(0)  # Invalid reference code, so we dont score it
6787                continue
6788            try:
6789                ast.parse(fix_python_spacing(completion))
6790                scores.append(0)  # Valid Python code
6791            except SyntaxError:
6792                scores.append(0.6)  # Invalid Python code
6793        return scores
6794
6795    def reward(self, reference: str, completions: List[str], language: str) -> BatchRewardOutput:
6796        """
6797        Get the score between a reference string and a list of completion strings.
6798        """
6799
6800        t0 = time.time()
6801        rewards = self.score(reference, completions, language)
6802        total_time = time.time() - t0
6803        timings = [total_time] * len(
6804            completions
6805        )  # Assuming equal distribution of time for each completion
6806
6807        output = BatchRewardOutput(rewards=rewards, timings=timings, extra_info={})
6808
6809        return output
6810
6811
6812
6813---
6814File: /coding/schemas/__init__.py
6815---
6816
6817from .context import Context
6818from .file import File
6819from .package import *
6820from .chat import *
6821from .swe import *
6822
6823
6824---
6825File: /coding/schemas/chat.py
6826---
6827
6828from strenum import StrEnum
6829from pydantic import BaseModel
6830
6831class ChatRole(StrEnum):
6832    """The role identifying who sent a chat message"""
6833    
6834    SYSTEM = "system"
6835    ASSISTANT = "assistant"
6836    USER = "user"
6837
6838class ChatMessage(BaseModel):
6839    role: ChatRole
6840    content: str
6841
6842
6843---
6844File: /coding/schemas/context.py
6845---
6846
6847# The MIT License (MIT)
6848# Copyright © 2024 Yuma Rao
6849# Copyright © 2023 Opentensor Foundation
6850# Copyright © 2024 Macrocosmos
6851# Copyright © 2024 Broke
6852
6853
6854# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
6855# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
6856# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
6857# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
6858
6859# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
6860# the Software.
6861
6862# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
6863# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
6864# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
6865# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
6866# DEALINGS IN THE SOFTWARE.
6867
6868from typing import List, Any, Dict
6869from pydantic import BaseModel
6870
6871from .file import File
6872from .chat import ChatMessage
6873
6874class Context(BaseModel):
6875    title: str = ""
6876    topic: str = ""
6877    content: str = ""
6878    internal_links: List[str] = []
6879    external_links: List[str] = []
6880    source: str = ""
6881    tags: List[str] = None
6882    extras: Dict[str, Any] = None
6883    files: List[File] = None
6884    messages: List[ChatMessage] = []
6885
6886
6887
6888---
6889File: /coding/schemas/file.py
6890---
6891
6892from pydantic import BaseModel
6893
6894class File(BaseModel):
6895    path: str
6896    content: str
6897
6898
6899---
6900File: /coding/schemas/model.py
6901---
6902
6903from pydantic import BaseModel
6904
6905class Model(BaseModel):
6906    model_name: str
6907    # prompt_tokens: dict
6908    # hash: str
6909    competition_id: int
6910    block: int
6911    
6912    
6913    def to_compressed_str(self) -> str:
6914        return f"{self.model_name}-{self.competition_id}-{self.block}"
6915    
6916    @classmethod
6917    def from_compressed_str(cls, compressed_str: str) -> "Model":
6918        model_name, competition_id, block = compressed_str.split("-")
6919        return cls(model_name=model_name, competition_id=int(competition_id), block=int(block))
6920    
6921    
6922
6923
6924
6925---
6926File: /coding/schemas/package.py
6927---
6928
6929import re
6930import random
6931import string
6932from typing import List, Dict
6933from pydantic import BaseModel
6934
6935from .file import File
6936
6937class Package(BaseModel):
6938    files: List[File]
6939    
6940    def update_file(self, new_file: File):
6941        for i, file in enumerate(self.files):
6942            if file.path == new_file.path:
6943                self.files[i] = new_file
6944                return
6945        raise ValueError(f"File with path {new_file.path} not found in package.")
6946
6947
6948class ObscurePackage(Package):
6949    mapping: Dict = {}
6950    
6951    def obscure_package(self):
6952        mapping = {}
6953
6954        # Obscure file paths
6955        for file in self.files:
6956            new_path = self._generate_random_string(len(file.path))+".py"
6957            mapping[file.path] = new_path
6958            file.path = new_path
6959
6960        # Obscure classes and contents
6961        for file in self.files:
6962            file.content, class_mapping = self._obscure_classes(file.content)
6963            mapping.update(class_mapping)
6964            file.content = self._obscure_contents(file.content, mapping)
6965
6966        self.mapping = mapping
6967
6968    def undo_obscure_package(self):
6969        if hasattr(self, 'mapping'):
6970            # Undo obscuring file paths
6971            reverse_mapping = {v: k for k, v in self.mapping.items()}
6972            for file in self.files:
6973                if file.path in reverse_mapping:
6974                    file.path = reverse_mapping[file.path]
6975
6976            # Undo obscuring classes and contents
6977            for file in self.files:
6978                file.content = self._undo_obscure_contents(file.content, reverse_mapping)
6979                file.content = self._undo_obscure_classes(file.content, reverse_mapping)
6980
6981            del self.mapping
6982
6983    def obscure_string(self, script: str):
6984        if not hasattr(self, 'mapping'):
6985            raise ValueError("Package must be obscured before obscuring a script string.")
6986        
6987        script, class_mapping = self._obscure_classes(script)
6988        script = self._obscure_contents(script, self.mapping)
6989        script = self._obscure_contents(script, class_mapping)
6990        return script
6991
6992    def undo_obscure_string(self, script: str):
6993        if not hasattr(self, 'mapping'):
6994            raise ValueError("Package must be obscured before undoing obscuring a script string.")
6995        
6996        reverse_mapping = {v: k for k, v in self.mapping.items()}
6997        script = self._undo_obscure_contents(script, reverse_mapping)
6998        script = self._undo_obscure_classes(script, reverse_mapping)
6999        return script
7000
7001    def _generate_random_string(self, length):
7002        return ''.join(random.choices(string.ascii_letters + string.digits, k=length))
7003
7004    def _generate_random_class_name(self, length):
7005        return ''.join(random.choices(string.ascii_uppercase, k=1) + random.choices(string.ascii_lowercase, k=length-1))
7006
7007    
7008    def _obscure_contents(self, contents, mapping):
7009        for original, obscure in mapping.items():
7010            contents = re.sub(r'\b' + re.escape(original) + r'\b', obscure, contents)
7011        return contents
7012
7013    def _undo_obscure_contents(self, contents, reverse_mapping):
7014        for obscure, original in reverse_mapping.items():
7015            contents = re.sub(r'\b' + re.escape(obscure) + r'\b', original, contents)
7016        return contents
7017
7018    def _obscure_classes(self, contents):
7019        class_pattern = r'\bclass\s+(\w+)'
7020        class_names = re.findall(class_pattern, contents)
7021        class_mapping = {}
7022        for class_name in class_names:
7023            new_class_name = self._generate_random_class_name(len(class_name))
7024            class_mapping[class_name] = new_class_name
7025            contents = re.sub(r'\b' + re.escape(class_name) + r'\b', new_class_name, contents)
7026        return contents, class_mapping
7027
7028    def _undo_obscure_classes(self, contents, reverse_mapping):
7029        class_names = list(reverse_mapping.keys())
7030        for obscure_name in class_names:
7031            original_name = reverse_mapping[obscure_name]
7032            contents = re.sub(r'\b' + re.escape(obscure_name) + r'\b', original_name, contents)
7033        return contents
7034
7035
7036---
7037File: /coding/schemas/swe.py
7038---
7039
7040from pydantic import BaseModel
7041
7042
7043class Edit(BaseModel):
7044    file_name: str
7045    line_number: int
7046    line_content: str
7047    new_line_content: str
7048
7049class Patch(BaseModel):
7050    edits: list[Edit]
7051
7052
7053---
7054File: /coding/schemas/tracking.py
7055---
7056
7057from typing import List
7058from pydantic import BaseModel
7059
7060from .model import Model
7061
7062class TrackingInfo(BaseModel):
7063    logic: dict
7064    block: int
7065    hotkey: str
7066    uid: int
7067    score: float = 0.0
7068
7069
7070
7071---
7072File: /coding/tasks/__init__.py
7073---
7074
7075# The MIT License (MIT)
7076# Copyright © 2024 Yuma Rao
7077# Copyright © 2023 Opentensor Foundation
7078# Copyright © 2024 Macrocosmos
7079# Copyright © 2024 Broke
7080
7081
7082# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
7083# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
7084# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7085# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7086
7087# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
7088# the Software.
7089
7090# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
7091# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7092# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
7093# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
7094# DEALINGS IN THE SOFTWARE.
7095
7096import random
7097from typing import Callable
7098
7099from .task import Task
7100from .swe import SWEBenchTask
7101# from .debug import DebugTask
7102from .fim import FillInMiddleTask
7103from .repofile import RepoFileTask
7104from .repo import RepoCompletionTask
7105from .completion import CompletionTask
7106from .bigcodebench import BigCodeBenchTask
7107from .organic_convo import OrganicConvoTask
7108from .bigcodebench import BigCodeBenchTask
7109
7110TASKS = {
7111    RepoCompletionTask.name: RepoCompletionTask,
7112    FillInMiddleTask.name: FillInMiddleTask,
7113    CompletionTask.name: CompletionTask,
7114    RepoFileTask.name: RepoFileTask,
7115    # DebugTask.name: DebugTask,
7116    SWEBenchTask.name: SWEBenchTask,
7117}
7118
7119from coding.schemas import Context
7120from coding.helpers import Selector
7121from coding.protocol import StreamCodeSynapse
7122from coding.datasets import TheStackDataset, PipDataset, SWEBenchDataset, DatasetManager
7123
7124TASK_REGISTRY = {
7125    RepoCompletionTask.name: [TheStackDataset.name],
7126    FillInMiddleTask.name: [TheStackDataset.name],
7127    CompletionTask.name: [TheStackDataset.name],
7128    RepoFileTask.name: [TheStackDataset.name],
7129    # DebugTask.name: [PipDataset.name],
7130    SWEBenchTask.name: [SWEBenchDataset.name],
7131}
7132
7133
7134def create_task(
7135    llm,
7136    task_name: str,
7137    selector: Selector = random.choice,
7138    repl = None,
7139    code_scorer: Callable = None,
7140    dataset_manager: DatasetManager = None
7141) -> Task:
7142    """Create a task from the given task name and LLM pipeline.
7143
7144    Args:
7145        llm (Pipeline): Pipeline to use for text generation
7146        task_name (str): Name of the task to create
7147        selector (Selector, optional): Selector function to choose a dataset. Defaults to random.choice.
7148
7149    Raises:
7150        ValueError: If task_name is not a valid alias for a task, or if the task is not a subclass of Task
7151        ValueError: If no datasets are available for the given task
7152        ValueError: If the dataset for the given task is not found
7153
7154    Returns:
7155        Task: Task instance
7156    """
7157    task = TASKS.get(task_name, None)
7158    if task is None or not issubclass(task, Task):
7159        raise ValueError(f"Task {task_name} not found")
7160
7161    dataset_choices = TASK_REGISTRY.get(task_name, None)
7162    if len(dataset_choices) == 0:
7163        raise ValueError(f"No datasets available for task {task_name}")
7164    dataset_name = selector(dataset_choices)
7165    dataset = dataset_manager.datasets.get(dataset_name, None)
7166    if dataset is None:
7167        raise ValueError(f"Dataset {dataset_name} not found")
7168    return task(llm=llm, context=dataset.next(**dict(task.dataset_options)), repl=repl, code_scorer=code_scorer)
7169
7170
7171def create_organic_task(
7172    llm,
7173    synapse: StreamCodeSynapse,
7174) -> Task:
7175    """Create a task from the given synapse and LLM pipeline."""
7176
7177    return OrganicConvoTask(
7178        llm=llm,
7179        context=Context(messages=synapse.messages, files=synapse.files),
7180    )
7181 
7182
7183
7184---
7185File: /coding/tasks/bigcodebench.py
7186---
7187
7188import re
7189from pydantic import BaseModel
7190from typing import Callable, List, Dict
7191
7192from .task import Task
7193from coding.schemas import Context
7194
7195
7196class BigCodeInstruction(BaseModel):
7197    imports: list[str]
7198    description: str
7199    parameters: dict
7200    returns: str
7201    example: str
7202    requirements: list[str]
7203    signature: str
7204    code: str
7205    
7206    @property
7207    def prompt(self) -> str:
7208        imports = "\n".join(f"import {pkg}" for pkg in self.imports)
7209        return f"""
7210write a function {self.signature} to:
7211{self.description}
7212
7213The function should output with:
7214{self.returns}
7215
7216You should start with:
7217```
7218{imports}
7219{self.signature} ```
7220"""
7221
7222
7223def bigcode_splitter(prompt: str) -> BigCodeInstruction:
7224    """
7225    Split the prompt string and return the generated prompt from BigCodeInstruction.
7226    """
7227    # Extracting each section using regex
7228    imports = re.findall(r"import (.+)", prompt)
7229    
7230    description_match = re.search(r'\"\"\"(.+?)Parameters:', prompt, re.DOTALL)
7231    description = description_match.group(1).strip() if description_match else ""
7232    
7233    parameters_match = re.search(r'Parameters:\s*(.+?)Requirements:', prompt, re.DOTALL)
7234    parameters_raw = parameters_match.group(1).strip() if parameters_match else ""
7235    parameters = parse_parameters(parameters_raw)
7236    
7237    requirements = re.findall(r"- (\w+)", prompt.split("Requirements:")[1].split("Example:")[0])
7238    
7239    example_match = re.search(r'Example:\s+(.+?)Returns:', prompt, re.DOTALL)
7240    example = example_match.group(1).strip() if example_match else ""
7241    
7242    returns_match = re.search(r'Returns:\s*(.+?)\"\"\"', prompt, re.DOTALL)
7243    returns = returns_match.group(1).strip() if returns_match else ""
7244    
7245    signature_match = re.search(r'def (.+?):', prompt)
7246    signature = f'def {signature_match.group(1)}' if signature_match else ""
7247
7248    # Extract the full code including the definition
7249    # TODO ensure to include the imports 
7250    code_match = re.search(r'(def .+?:\s*.+)', prompt, re.DOTALL)
7251    code = code_match.group(1).strip() if code_match else ""
7252
7253    # Create the BigCodeInstruction instance
7254    instruction = BigCodeInstruction(
7255        imports=imports,
7256        description=description,
7257        parameters=parameters,
7258        returns=returns,
7259        example=example,
7260        code=code,
7261        requirements=requirements,
7262        signature=signature
7263    )
7264    
7265    # Return the formatted prompt
7266    return instruction
7267
7268def parse_parameters(params_raw: str) -> Dict:
7269    """
7270    Parse the parameters section into a dictionary.
7271    """
7272    parameters = {}
7273    for param_line in params_raw.splitlines():
7274        param_line = param_line.strip()
7275        if param_line:
7276            # Example format: "- corpus (List[str]): A list of text documents"
7277            match = re.match(r'- (\w+) \(([^)]+)\): (.+)', param_line)
7278            if match:
7279                param_name, param_type, param_desc = match.groups()
7280                parameters[param_name] = {"type": param_type, "description": param_desc}
7281    return parameters
7282
7283
7284class BigCodeBenchTask(Task):
7285    name: str = "bigcodebench"
7286    desc: str = "Complete the code to match the given instructions"
7287    goal: str = "to complete the code to match the given instructions"
7288    reward_definition: str = [
7289        dict(name="codesim", weight=0.8),
7290        dict(name="speed", weight=0.2, ideal_time=4.5)
7291    ]
7292    penalty_definition: List = [
7293    ]
7294    cleaning_pipeline: List = [
7295    ] 
7296    dataset_options: Dict = {}
7297    attachments = []
7298    messages = []
7299    files = []
7300    
7301    def __init__(self, llm: Callable | None = None, context: Context | None = None, **kwargs):
7302        self.context = context
7303        instruction = bigcode_splitter(context.content)
7304        self.query = instruction.prompt
7305        self.reference = context.content
7306        self.topic = context.title
7307        self.subtopic = context.topic
7308        self.tags = context.tags
7309
7310
7311---
7312File: /coding/tasks/completion.py
7313---
7314
7315import ast
7316import random
7317from typing import Callable, List, Dict
7318
7319from .task import Task
7320from coding.schemas import Context
7321from coding.helpers.fim import insert_fim_hole
7322from coding.helpers.rewrite import rewrite_code
7323
7324def extract_random_function(code):
7325    """
7326    Takes a string of Python code, finds a random function within it, 
7327    and returns the function signature and body as separate strings.
7328
7329    Parameters:
7330    code (str): The Python code as a string.
7331
7332    Returns:
7333    tuple: A tuple containing the function signature and function body as separate strings.
7334    """
7335    random.seed(None)
7336    try:
7337        tree = ast.parse(code)
7338    except SyntaxError as e:
7339        return None, None
7340
7341    functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
7342
7343    if not functions:
7344        return None, None
7345
7346    selected_function = random.choice(functions)
7347
7348    # Construct the function signature
7349    args = [arg.arg for arg in selected_function.args.args]
7350    args_str = ", ".join(args)
7351    func_signature = f"def {selected_function.name}({args_str}):"
7352    
7353    # Extract the function body (excluding the signature)
7354    # `ast.get_source_segment` gives us the entire function, so we need to split it.
7355    full_function = ast.get_source_segment(code, selected_function)
7356    func_body = full_function.split(":", 1)[-1].strip()  # Split at the first colon and remove leading/trailing whitespace
7357
7358    return func_signature, func_body
7359
7360class CompletionTask(Task):
7361    name: str = "completion"
7362    desc: str = "code completion"
7363    goal: str = "complete the code "
7364    reward_definition: List[dict] = [
7365        dict(name="codesim", weight=0.8),
7366        dict(name="speed", weight=0.2, ideal_time=1.5)
7367    ]
7368    penalty_definition: List = [
7369        dict(name="validcode", weight=1) 
7370    ]
7371    cleaning_pipeline: List = [
7372    ] # TODO remove markdown wrappings
7373    dataset_options: Dict = {}
7374    attachments = []
7375    messages = []
7376    files = []
7377    
7378    def __init__(self, llm: Callable, context: Context, **kwargs):
7379        self.context = context
7380        context.content = rewrite_code(context.content, llm)
7381        
7382        func_signature, func_body = extract_random_function(context.content) # TODO handle comments
7383        if func_signature is None or func_body is None:
7384            self.query, self.reference = insert_fim_hole(context.content)
7385        else:
7386            self.query = (
7387                func_signature + "<|fim_hole|>" # we want them to complete that area, pretending its a hole
7388            )
7389            self.reference = func_body
7390        
7391        self.topic = context.title
7392        self.subtopic = context.topic
7393        self.tags = context.tags
7394
7395
7396---
7397File: /coding/tasks/fim.py
7398---
7399
7400import random
7401from typing import Callable, List, Dict
7402
7403from .task import Task
7404from coding.schemas import Context
7405from coding.helpers.rewrite import rewrite_code
7406
7407def make_hole(text, chunk_size=5):
7408    lines = text.splitlines()
7409    total_lines = len(lines)
7410    
7411    if chunk_size >= total_lines:
7412        return '<|fim_hole|>', text
7413    
7414    start_index = random.randint(0, total_lines - chunk_size)
7415    end_index = start_index + chunk_size
7416    
7417    hole = '\n'.join(lines[start_index:end_index])
7418    new_lines = lines[:start_index] + ['<|fim_hole|>'] + lines[end_index:]
7419    
7420    return '\n'.join(new_lines), hole
7421
7422class FillInMiddleTask(Task):
7423    name: str = "fim"
7424    desc: str = "fill in the middle of the code"
7425    goal: str = "to fill in the blanks in the code"
7426    reward_definition: str = [
7427        dict(name="codesim", weight=0.8),
7428        dict(name="speed", weight=0.2, ideal_time=1.5)
7429    ]
7430    penalty_definition: List = [
7431        dict(name="validcode", weight=1) 
7432    ]
7433    cleaning_pipeline: List = [
7434    ] # TODO remove markdown wrappings
7435    dataset_options: Dict = {}
7436    attachments = []
7437    messages = []
7438    files = []
7439    
7440    def __init__(self, llm: Callable, context: Context, **kwargs):
7441        self.context = context
7442        context.content = rewrite_code(context.content, llm)
7443        fim_query, hole = make_hole(context.content)
7444        self.query = (
7445            fim_query
7446        )
7447        self.reference = hole
7448
7449        self.topic = context.title
7450        self.subtopic = context.topic
7451        self.tags = context.tags
7452
7453
7454---
7455File: /coding/tasks/organic_convo.py
7456---
7457
7458import ast
7459import random
7460from typing import Callable, List, Dict
7461
7462from .task import Task
7463from coding.schemas import Context, ChatMessage, File
7464
7465def complete_conversation(llm: Callable, messages: List[ChatMessage], files: List[File], **kwargs):
7466    if not messages:
7467        raise ValueError("No messages provided")
7468    additional_context = ""
7469    if files:
7470        additional_context += "\n\nUse the following files as context for your response: \n"
7471        for file in files:
7472            if "path" not in file:
7473                file.path = ""
7474            file.content = file.content.replace("}", "}}").replace("{", "{{")
7475            additional_context += f"#{file.path}\n{file.content}\n"
7476    messages[-1].content += additional_context
7477    response = llm.invoke([msg.dict() for msg in messages]).content
7478    return response
7479        
7480
7481class OrganicConvoTask(Task):
7482    name: str = "organic_convo"
7483    desc: str = "organic conversation task"
7484    goal: str = "respond correctly to the conversation"
7485    reward_definition: List[dict] = [
7486        dict(name="codesim", weight=0.8), # TODO using code similarity might not work for responses, but it should be fine? maybe do rogue or difflib 
7487        dict(name="speed", weight=0.2, ideal_time=2.5)
7488    ]
7489    penalty_definition: List = []
7490    cleaning_pipeline: List = [
7491    ] # TODO remove markdown wrappings
7492    dataset_options: Dict = {}
7493    attachments = []
7494    messages = []
7495    files = []
7496    
7497    
7498    def __init__(self, llm: Callable, context: Context, **kwargs):
7499        self.context = context
7500
7501        self.query = None
7502        self.messages = context.messages
7503        self.files = context.files
7504        self.reference = complete_conversation(llm, self.messages, self.files)
7505
7506        self.topic = context.title
7507        self.subtopic = context.topic
7508        self.tags = context.tags
7509
7510
7511---
7512File: /coding/tasks/repo.py
7513---
7514
7515import ast
7516import random
7517from typing import Callable, List, Dict
7518
7519from .task import Task
7520from coding.schemas import Context, File
7521from coding.helpers.fim import insert_fim_hole
7522from coding.helpers.rewrite import rewrite_code
7523
7524def delete_function_body_and_following(code: str) -> (str, str):
7525    """
7526    Takes in some code, randomly finds a function, deletes the body of that function and anything after it.
7527    
7528    Returns the function definition alongside the deleted body of the function.
7529    """
7530    random.seed(None)
7531    
7532    class FunctionBodyRemover(ast.NodeTransformer):
7533        def __init__(self, target_func_name):
7534            self.target_func_name = target_func_name
7535            self.body = None
7536            self.stop_processing = False
7537
7538        def visit_FunctionDef(self, node):
7539            if self.stop_processing:
7540                return None
7541            if node.name == self.target_func_name:
7542                self.body = ast.unparse(node.body) if node.body else ""
7543                node.body = []  # Remove the function body
7544                self.stop_processing = True  # Stop after we modify the targeted function
7545            return node
7546
7547    # Parse the code into an ASTt
7548    try:
7549        tree = ast.parse(code)
7550    except Exception as e:
7551        return None, None
7552
7553    # Randomly select a function to delete the body from
7554    functions = [node for node in tree.body if isinstance(node, ast.FunctionDef)]
7555    if not functions:
7556        return None, None
7557
7558    target_func = random.choice(functions)
7559
7560    # Remove the body of the target function
7561    remover = FunctionBodyRemover(target_func.name)
7562    remover.visit(tree)
7563
7564    # If the body was not captured, return an empty string
7565    if remover.body is None or remover.body.strip() == "":
7566        return None, None
7567
7568    # Find the function definition line in the original code
7569    func_def_start = code.find(f'def {target_func.name}')
7570    
7571    if func_def_start == -1:
7572        return None, None
7573
7574    # Extract just the function definition line
7575    func_def_end = code.find(":", func_def_start) + 1
7576    function_definition = code[func_def_start:func_def_end]
7577    
7578    if function_definition.strip() == "":
7579        return None, None
7580    
7581    if not function_definition or not remover.body:
7582        return None, None
7583        
7584    return function_definition, remover.body
7585
7586
7587class RepoCompletionTask(Task):
7588    name: str = "repo"
7589    desc: str = "repository level code completion"
7590    goal: str = "complete the code given the context of the rest of the repo"
7591    reward_definition: List[dict] = [
7592        dict(name="codesim", weight=0.8),
7593        dict(name="speed", weight=0.2, ideal_time=2.5)
7594    ]
7595    penalty_definition: List = [
7596        dict(name="validcode", weight=1) 
7597    ]
7598    cleaning_pipeline: List = [
7599    ] # TODO remove markdown wrappings
7600    dataset_options: Dict = dict(include_sibling_docs=True)
7601    attachments = []
7602    messages = []
7603    files = []    
7604
7605    def __init__(self, llm: Callable, context: Context, **kwargs):
7606        self.context = context
7607        context.content = rewrite_code(context.content, llm)
7608
7609        if context.topic == "Python":
7610            mod_code, correct_body = delete_function_body_and_following(context.content)
7611            if mod_code is not None and correct_body is not None:
7612                self.query = mod_code + "<|fim_hole|>"
7613                self.reference = correct_body
7614            else:
7615                self.query, self.reference = insert_fim_hole(context.content)
7616        else:
7617            self.query, self.reference = insert_fim_hole(context.content)
7618        # rewrite every file
7619        for file in context.extras['sibling_docs']:
7620            file.content = rewrite_code(file.content, llm)
7621        self.files = [File(path=cont.title, content=cont.content) for cont in context.extras['sibling_docs']] # Filter the info sent to the miners
7622
7623        self.topic = context.title
7624        self.subtopic = context.topic
7625        self.tags = context.tags
7626
7627
7628---
7629File: /coding/tasks/repofile.py
7630---
7631
7632from typing import Callable, List, Dict
7633
7634from .task import Task
7635from coding.schemas import Context, File
7636from coding.helpers.rewrite import rewrite_code
7637class RepoFileTask(Task):
7638    name: str = "repofile"
7639    desc: str = "repository level file creation"
7640    goal: str = "write the python module that completes the code"
7641    reward_definition: List[dict] = [
7642        dict(name="codesim", weight=0.8), # TODO compare functions and objects to the closest as they might be out of order
7643        dict(name="speed", weight=0.2, ideal_time=3)
7644    ]
7645    penalty_definition: List = [
7646        dict(name="validcode", weight=1) 
7647    ]
7648    cleaning_pipeline: List = [] # TODO remove markdown wrappings
7649    dataset_options: Dict = dict(include_sibling_docs=True)
7650    attachments = []
7651    messages = []
7652    files = []
7653    
7654    def __init__(self, llm: Callable, context: Context, **kwargs):
7655        self.context = context
7656
7657        self.query = (
7658            "write code to" + llm.invoke(f'Summarize what is happening in this code: {context.content}').content
7659        )
7660        # rewrite every file
7661        for file in context.extras['sibling_docs']:
7662            file.content = rewrite_code(file.content, llm)
7663        self.files = [File(path=cont.title, content=cont.content) for cont in context.extras['sibling_docs']] # Filter the info sent to the miners
7664        self.reference = context.content
7665
7666        self.topic = context.title
7667        self.subtopic = context.topic
7668        self.tags = context.tags
7669
7670
7671---
7672File: /coding/tasks/swe.py
7673---
7674
7675import re
7676import bittensor as bt
7677from pydantic import BaseModel
7678from typing import Callable, List, Dict
7679from code_bert_score import BERTScorer
7680
7681from .task import Task
7682from coding.helpers.git import GitRepo
7683from coding.rewards.codesim import CodeSimModel
7684from coding.schemas import Context, Patch, Edit
7685
7686class PatchChunk(BaseModel):
7687    file_name: str
7688    start_index: int
7689    end_index: int
7690    content: str
7691    new_content: str
7692
7693def parse_diff(diff_text: str, no_title=False) -> Patch:
7694    diff_pattern = r"^diff --git a\/(.+?) b\/(.+?)$"
7695    line_change_pattern = r"^@@ -(\d+),\d+ \+(\d+),\d+ @@"
7696    edits = []
7697
7698    current_file = None
7699    old_file_line_num = 0
7700    new_file_line_num = 0
7701
7702    for line in diff_text.splitlines():
7703        diff_match = re.match(diff_pattern, line)
7704        if diff_match:
7705            current_file = diff_match.group(2)
7706            old_file_line_num = 0
7707            new_file_line_num = 0
7708            continue
7709        elif no_title and not current_file:
7710            current_file = ""
7711            old_file_line_num = 0
7712            new_file_line_num = 0
7713            continue
7714
7715        line_change_match = re.match(line_change_pattern, line)
7716
7717        if line_change_match:
7718            old_file_line_num = int(line_change_match.group(1))
7719            new_file_line_num = int(line_change_match.group(2))
7720            continue
7721
7722        if line.startswith("+") and not line.startswith("+++"):
7723            # Line added in new file
7724            edits.append(
7725                Edit(
7726                    file_name=current_file,
7727                    line_number=new_file_line_num,
7728                    line_content="",
7729                    new_line_content=line[1:].strip(),
7730                )
7731            )
7732            new_file_line_num += 1
7733        elif line.startswith("-") and not line.startswith("---"):
7734            # Line removed from old file
7735            edits.append(
7736                Edit(
7737                    file_name=current_file,
7738                    line_number=old_file_line_num,
7739                    line_content=line[1:].strip(),
7740                    new_line_content="",
7741                )
7742            )
7743            old_file_line_num += 1
7744        elif line.startswith(" "):
7745            # Context lines (lines present in both old and new files)
7746            old_file_line_num += 1
7747            new_file_line_num += 1
7748
7749    return Patch(edits=edits)
7750
7751
7752# TODO ensure chunks within 2 lines of each other are grouped together
7753def chunk_patch(patch: Patch) -> List[PatchChunk]:
7754    chunks = []
7755    current_chunk = []
7756    current_file = None
7757    
7758    # Group edits by file and line number
7759    file_edits = {}
7760    for edit in patch.edits:
7761        if edit.file_name not in file_edits:
7762            file_edits[edit.file_name] = {}
7763        if edit.line_number not in file_edits[edit.file_name]:
7764            file_edits[edit.file_name][edit.line_number] = []
7765        file_edits[edit.file_name][edit.line_number].append(edit)
7766
7767    # Process each file's edits
7768    for file_name, line_edits in file_edits.items():
7769        current_chunk = []
7770        prev_line = None
7771        
7772        # Sort line numbers
7773        for line_num in sorted(line_edits.keys()):
7774            if prev_line is None or line_num <= prev_line + 1:
7775                current_chunk.extend(line_edits[line_num])
7776            else:
7777                # Create chunk for previous group
7778                if current_chunk:
7779                    start_idx = current_chunk[0].line_number
7780                    end_idx = current_chunk[-1].line_number
7781                    content = "\n".join(e.line_content for e in current_chunk if e.line_content)
7782                    new_content = "\n".join(e.new_line_content for e in current_chunk if e.new_line_content)
7783                    chunks.append(PatchChunk(
7784                        file_name=file_name,
7785                        start_index=start_idx,
7786                        end_index=end_idx,
7787                        content=content,
7788                        new_content=new_content
7789                    ))
7790                current_chunk = line_edits[line_num]
7791            prev_line = line_num
7792            
7793        # Add final chunk for this file
7794        if current_chunk:
7795            start_idx = current_chunk[0].line_number
7796            end_idx = current_chunk[-1].line_number
7797            content = "\n".join(e.line_content for e in current_chunk if e.line_content)
7798            new_content = "\n".join(e.new_line_content for e in current_chunk if e.new_line_content)
7799            chunks.append(PatchChunk(
7800                file_name=file_name,
7801                start_index=start_idx,
7802                end_index=end_idx,
7803                content=content,
7804                new_content=new_content
7805            ))
7806
7807    return chunks
7808
7809class SWEBenchTask(Task):
7810    name: str = "swebench"
7811    desc: str = "given a github issue corrrectly solve it"
7812    goal: str = "return the valid patch"
7813    reward_definition: str = [
7814        dict(name="speed", weight=0.1, ideal_time=25),
7815        dict(name="self", weight=0.9),
7816    ]
7817    penalty_definition: List = []
7818    cleaning_pipeline: List = []  # TODO remove markdown wrappings
7819    dataset_options: Dict = {}
7820    attachments = []
7821    messages = []
7822    files = []
7823
7824    def __init__(
7825        self, llm: Callable, context: Context, code_scorer: Callable = None, **kwargs
7826    ):
7827        self.repo = GitRepo(context.title, context.extras["base_commit"])
7828        if code_scorer is None:
7829            self.code_scorer = CodeSimModel()
7830        else:
7831            self.code_scorer = code_scorer
7832        self.context = context
7833        self.patch: Patch = parse_diff(context.content)
7834        self.query = context.topic
7835        # self.repo = context.title
7836        self.base_commit = context.extras["base_commit"]
7837        self.pull_number = context.extras["pull_number"]
7838        self.topic = context.title
7839        self.subtopic = context.topic
7840        self.tags = context.tags
7841
7842    def score(self, patch: Patch, token_count: int):
7843        bt.logging.info(f"Scoring patch")
7844        num_valid_lines = len(self.patch.edits)
7845        num_miner_lines = len(patch.edits)
7846        
7847        # Checking to see if the miner changed more than what was needed
7848        lines_over_percent = 1
7849        
7850        if num_valid_lines > 20:
7851            if num_miner_lines / num_valid_lines > 3:
7852                lines_over_percent -= ((num_miner_lines - (num_valid_lines * 2)) / num_valid_lines) * 0.1
7853        else:
7854            if num_miner_lines / num_valid_lines > 7:
7855                lines_over_percent -= ((num_miner_lines - (num_valid_lines * 2)) / num_valid_lines) * 0.1
7856        
7857        if lines_over_percent <= 0:
7858            return 0
7859        
7860        valid_num_lines = {}  # file name -> num lines
7861        miner_num_lines = {}
7862
7863        for edit in self.patch.edits:
7864            if edit.file_name not in valid_num_lines:
7865                valid_num_lines[edit.file_name] = 0
7866            valid_num_lines[edit.file_name] += 1
7867
7868            if edit.file_name not in miner_num_lines:
7869                miner_num_lines[edit.file_name] = 0
7870            miner_num_lines[edit.file_name] += 1
7871
7872        # see which lines in valid patch are in miner patch and find percent
7873        # miner can edit extra lines but not less
7874        total_valid_lines = 0
7875        lines_in_miner = 0
7876        for file_name in valid_num_lines:
7877            if file_name in miner_num_lines:
7878                valid_lines = [
7879                    edit.line_number
7880                    for edit in self.patch.edits
7881                    if edit.file_name == file_name
7882                ]
7883                miner_lines = [
7884                    edit.line_number
7885                    for edit in patch.edits
7886                    if edit.file_name == file_name
7887                ]
7888                lines_in_miner += len(set(valid_lines) & set(miner_lines))
7889                total_valid_lines += len(set(valid_lines))
7890        percent_lines_in_miner = lines_in_miner / total_valid_lines if total_valid_lines > 0 else 0
7891        
7892        
7893        
7894        # Group edits into chunks by consecutive line numbers
7895        valid_chunks = chunk_patch(self.patch)
7896        miner_chunks = chunk_patch(patch)
7897
7898        chunk_score = 0
7899        total_chunk_score = 0
7900        # find chunks that share an index in the same file
7901        for valid_chunk in valid_chunks:
7902            exists = False
7903            for miner_chunk in miner_chunks:
7904                if (
7905                    miner_chunk.file_name == valid_chunk.file_name
7906                    and abs(miner_chunk.start_index - valid_chunk.start_index) <= 10
7907                ):
7908                    chunk_score += self.code_scorer.similarity(
7909                        miner_chunk.new_content, valid_chunk.new_content
7910                    )
7911                    total_chunk_score += 1
7912                    exists = True
7913                    break
7914            if not exists:
7915                total_chunk_score += 1
7916
7917        chunk_percent = chunk_score / total_chunk_score
7918        score = ((5 * percent_lines_in_miner + 5 * chunk_percent) / 10) * lines_over_percent
7919
7920        return score
7921
7922
7923
7924---
7925File: /coding/tasks/task.py
7926---
7927
7928# The MIT License (MIT)
7929# Copyright © 2024 Yuma Rao
7930# Copyright © 2023 Opentensor Foundation
7931# Copyright © 2024 Macrocosmos
7932# Copyright © 2024 Broke
7933
7934
7935# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
7936# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
7937# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
7938# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7939
7940# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
7941# the Software.
7942
7943# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
7944# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
7945# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
7946# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
7947# DEALINGS IN THE SOFTWARE.
7948
7949from abc import ABC
7950from dataclasses import dataclass, field
7951from typing import List, Union, Any, Dict, Callable
7952
7953from coding.schemas import Context, File
7954
7955
7956@dataclass
7957class Task(ABC):
7958    name: str
7959    desc: str
7960    goal: str
7961    query: str
7962    topic: str
7963    subtopic: str
7964    tags: List[str]
7965    context: Context
7966    reward_definition: List[dict]
7967    timeout: int = 12
7968    attachments: List[Any] = field(default_factory=[])
7969    files: List[File] = field(default_factory=[])
7970    penalty_definition: List[dict] = None
7971    dataset_options: Dict = field(default_factory=dict)
7972    reward_threshold: float = 0.0
7973    reference: Union[str, List[str], Dict] = ""
7974    criteria: str = ("",)
7975    delimiter: str = ""
7976    complete: bool = False
7977    static_reference: bool = False
7978    static_query: bool = False
7979    reference_prompt: str = ""
7980    query_system_prompt: str = ""
7981    query_prompt: str = ""
7982    llm: Callable = None
7983    code_scorer: Callable = None
7984    extra_info: Dict = field(default_factory=dict)
7985
7986    def __str__(self):
7987        return f"{self.__class__.__name__}(name={self.name!r}, desc={self.desc!r}, goal={self.goal!r}, query={self.query!r}, reference={self.reference!r}, topic={self.topic!r}, subtopic={self.subtopic!r}, tags={self.tags!r})"
7988
7989    def __repr__(self):
7990        return str(self)
7991
7992    def __state_dict__(self, full=False):
7993        state = {
7994            "task": self.name,
7995            "desc": self.desc,
7996            "goal": self.goal,
7997            "query": self.query, 
7998            "query_time": getattr(self, "query_time", 0),
7999            "reference": self.reference,
8000            "reference_time": getattr(self, "reference_time", 0),
8001            "topic": self.topic,
8002            "subtopic": self.subtopic,
8003            "context_time": self.context.stats.get("fetch_time", 0.0),
8004        }
8005        if full:
8006            state.update(dict(self.context))
8007
8008        return state
8009
8010
8011---
8012File: /coding/utils/__init__.py
8013---
8014
8015# from . import config
8016from . import misc
8017from . import uids
8018
8019
8020---
8021File: /coding/utils/config.py
8022---
8023
8024# The MIT License (MIT)
8025# Copyright © 2023 Yuma Rao
8026# Copyright © 2023 Opentensor Foundation
8027
8028# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8029# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8030# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8031# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8032
8033# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8034# the Software.
8035
8036# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8037# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8038# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8039# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8040# DEALINGS IN THE SOFTWARE.
8041
8042import os
8043import subprocess
8044import argparse
8045import bittensor as bt
8046from .logging import setup_events_logger
8047
8048from coding.tasks import TASKS
8049
8050def is_cuda_available():
8051    try:
8052        output = subprocess.check_output(["nvidia-smi", "-L"], stderr=subprocess.STDOUT)
8053        if "NVIDIA" in output.decode("utf-8"):
8054            return "cuda"
8055    except Exception:
8056        pass
8057    try:
8058        output = subprocess.check_output(["nvcc", "--version"]).decode("utf-8")
8059        if "release" in output:
8060            return "cuda"
8061    except Exception:
8062        pass
8063    return "cpu"
8064
8065def check_config(cls, config: "bt.Config"):
8066    r"""Checks/validates the config namespace object."""
8067    bt.logging.check_config(config)
8068
8069    full_path = os.path.expanduser(
8070        "{}/{}/{}/netuid{}/{}".format(
8071            config.logging.logging_dir,  # TODO: change from ~/.bittensor/miners to ~/.bittensor/neurons
8072            config.wallet.name,
8073            config.wallet.hotkey,
8074            config.netuid,
8075            config.neuron.name,
8076        )
8077    )
8078    
8079    config.neuron.full_path = os.path.expanduser(full_path)
8080    if not os.path.exists(config.neuron.full_path):
8081        os.makedirs(config.neuron.full_path, exist_ok=True)
8082
8083    if not config.neuron.dont_save_events:
8084        # Add custom event logger for the events.
8085        events_logger = setup_events_logger(
8086            config.neuron.full_path, config.neuron.events_retention_size
8087        )
8088        bt.logging.register_primary_logger(events_logger.name) 
8089
8090
8091def add_args(cls, parser):
8092    """
8093    Adds relevant arguments to the parser for operation.
8094    """
8095    parser.add_argument("--netuid", type=int, help="Subnet netuid", default=45)
8096    parser.add_argument(
8097        "--neuron.device",
8098        type=str,
8099        help="Device to run on.",
8100        default=is_cuda_available(),
8101    )
8102    parser.add_argument(
8103        "--neuron.epoch_length",
8104        type=int,
8105        help="The default epoch length (how often we set weights, measured in 12 second blocks).",
8106        default=100,
8107    )
8108
8109    parser.add_argument(
8110        "--mock",
8111        action="store_true",
8112        help="Mock neuron and all network components.",
8113        default=False,
8114    )
8115
8116    parser.add_argument(
8117        "--neuron.events_retention_size",
8118        type=str,
8119        help="Events retention size.",
8120        default=2 * 1024 * 1024 * 1024,  # 2 GB
8121    )
8122
8123    parser.add_argument(
8124        "--neuron.dont_save_events",
8125        action="store_true",
8126        help="If set, we dont save events to a log file.",
8127        default=False,
8128    )
8129    
8130    parser.add_argument(
8131        "--neuron.tasks",
8132        type=str,
8133        nargs="+",
8134        help="The tasks to use for the validator.",
8135        default=list(TASKS.keys()),
8136    )
8137    
8138    parser.add_argument(
8139        "--neuron.task_weights",
8140        type=int,
8141        nargs="+",
8142        help="The weights for sampling of each task.",
8143        default=[0,0,0,0,0,1]
8144    )
8145    
8146    parser.add_argument(
8147        "--neuron.percent_organic_score",
8148        type=float,
8149        help="The percent of organic synapses to score",
8150        default=0.25,
8151    )
8152
8153def add_miner_args(cls, parser):
8154    """Add miner specific arguments to the parser."""
8155
8156    parser.add_argument(
8157        "--miner.name",
8158        type=str,
8159        help="The name of the miner to load",
8160        default="miner",
8161    )
8162    
8163    parser.add_argument(
8164        "--neuron.model_id",
8165        type=str,
8166        help="The model to use for the validator.",
8167        default="gpt-3.5-turbo-1106",
8168    )
8169
8170    parser.add_argument(
8171        "--neuron.name",
8172        type=str,
8173        help="Trials for this neuron go in neuron.root / (wallet_cold - wallet_hot) / neuron.name. ",
8174        default="miner",
8175    )
8176
8177    parser.add_argument(
8178        "--blacklist.force_validator_permit",
8179        action="store_true",
8180        help="If set, we will force incoming requests to have a permit.",
8181        default=True,
8182    )
8183
8184    parser.add_argument(
8185        "--blacklist.allow_non_registered",
8186        action="store_true",
8187        help="If set, miners will accept queries from non registered entities. (Dangerous!)",
8188        default=False,
8189    )
8190    
8191    parser.add_argument(
8192        "--neuron.streaming_batch_size",
8193        type=int,
8194        default=12,
8195        help="Batch size in tokens for streaming forward calls.",
8196    )
8197
8198
8199
8200def add_validator_args(cls, parser):
8201    """Add validator specific arguments to the parser."""
8202
8203    parser.add_argument(
8204        "--neuron.name",
8205        type=str,
8206        help="Trials for this neuron go in neuron.root / (wallet_cold - wallet_hot) / neuron.name. ",
8207        default="validator",
8208    )
8209
8210    parser.add_argument(
8211        "--neuron.timeout",
8212        type=float,
8213        help="The timeout for each forward call in seconds.",
8214        default=10,
8215    )
8216
8217    parser.add_argument(
8218        "--neuron.num_concurrent_forwards",
8219        type=int,
8220        help="The number of concurrent forwards running at any time.",
8221        default=1, # TODO increase
8222    )
8223
8224    parser.add_argument(
8225        "--neuron.sample_size",
8226        type=int,
8227        help="The number of miners to query in a single step.",
8228        default=50, # TODO decrease?
8229    )
8230
8231    parser.add_argument(
8232        "--neuron.disable_set_weights",
8233        action="store_true",
8234        help="Disables setting weights.",
8235        default=False,
8236    )
8237
8238    parser.add_argument(
8239        "--neuron.moving_average_alpha",
8240        type=float,
8241        help="Moving average alpha parameter, how much to add of the new observation.",
8242        default=0.05,
8243    )
8244    
8245    parser.add_argument(
8246        "--wandb.project_name",
8247        type=str,
8248        help="The name of the project where you are sending the new run.",
8249        default="gen42",
8250    )
8251
8252    parser.add_argument(
8253        "--wandb.on",
8254        type=bool,
8255        default=True,
8256        help="Enable wandb logging.",
8257    )
8258
8259    parser.add_argument(
8260        "--wandb.entity",
8261        type=str,
8262        default="gen42",
8263        help="Wandb entity to log to.",
8264    )
8265    
8266    parser.add_argument(
8267        "--neuron.axon_off",
8268        "--axon_off",
8269        action="store_true",
8270        # Note: the validator needs to serve an Axon with their IP or they may
8271        #   be blacklisted by the firewall of serving peers on the network.
8272        help="Set this flag to not attempt to serve an Axon.",
8273        default=False,
8274    )
8275
8276    parser.add_argument(
8277        "--neuron.vpermit_tao_limit",
8278        type=int,
8279        help="The maximum number of TAO allowed to query a validator with a vpermit.",
8280        default=4096,
8281    )
8282    
8283    parser.add_argument(
8284        "--neuron.model_id",
8285        type=str,
8286        help="The name of the LLM to be used for the validator.",
8287        default="Qwen/Qwen2.5-14B-Instruct-GPTQ-Int4",
8288    )
8289    
8290    parser.add_argument(
8291        "--neuron.model_url",
8292        type=str,
8293        help="The openai compatible model url to be used for the validator",
8294        default="http://localhost:8028/v1",
8295    )
8296    
8297    parser.add_argument(
8298        "--neuron.vllm_api_key",
8299        type=str,
8300        help="The openai compatible model url to be used for the validator",
8301        default="EMPTY",
8302    )
8303
8304    parser.add_argument(
8305        "--neuron.forward_max_time",
8306        type=int,
8307        help="Max time to wait for a forward call to complete in seconds.",
8308        default=120,
8309    )
8310    
8311    parser.add_argument(
8312        "--neuron.finetune_gpu_id",
8313        type=int,
8314        help="The gpu to use for finetuning.",
8315        default=0,
8316    )
8317    
8318    parser.add_argument(
8319        "--neuron.finetune_test_size",
8320        type=int,
8321        help="The number of finetune tasks to generate and score with.",
8322        default=100,
8323    )
8324    
8325
8326
8327
8328def config(cls):
8329    """
8330    Returns the configuration object specific to this miner or validator after adding relevant arguments.
8331    """
8332    parser = argparse.ArgumentParser()
8333    bt.wallet.add_args(parser)
8334    bt.subtensor.add_args(parser)
8335    bt.logging.add_args(parser)
8336    bt.axon.add_args(parser)
8337    if cls is not None: 
8338        cls.add_args(parser)
8339    bt.trace() # TODO add if statement for if they want this
8340    bt.debug()
8341    return bt.config(parser)
8342
8343
8344
8345---
8346File: /coding/utils/exceptions.py
8347---
8348
8349# The MIT License (MIT)
8350# Copyright © 2024 Yuma Rao
8351# Copyright © 2023 Opentensor Foundation
8352# Copyright © 2024 Macrocosmos
8353# Copyright © 2024 Broke
8354
8355
8356# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8357# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8358# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8359# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8360
8361# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8362# the Software.
8363
8364# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8365# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8366# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8367# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8368# DEALINGS IN THE SOFTWARE.
8369
8370class MaxRetryError(Exception):
8371    """Exception raised when the maximum number of retries is exceeded."""
8372
8373    def __init__(self, message="Maximum number of retries exceeded"):
8374        self.message = message
8375        super().__init__(self.message)
8376
8377
8378---
8379File: /coding/utils/logging.py
8380---
8381
8382# The MIT License (MIT)
8383# Copyright © 2024 Yuma Rao
8384# Copyright © 2023 Opentensor Foundation
8385# Copyright © 2024 Macrocosmos
8386# Copyright © 2024 Brokespace
8387
8388
8389# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8390# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8391# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8392# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8393
8394# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8395# the Software.
8396
8397# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8398# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8399# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8400# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8401# DEALINGS IN THE SOFTWARE.
8402
8403import os
8404import copy
8405import wandb
8406import coding
8407import logging
8408import bittensor as bt
8409from logging.handlers import RotatingFileHandler
8410
8411
8412EVENTS_LEVEL_NUM = 38
8413DEFAULT_LOG_BACKUP_COUNT = 10
8414
8415def setup_events_logger(full_path, events_retention_size):
8416    logging.addLevelName(EVENTS_LEVEL_NUM, "EVENT")
8417
8418    logger = logging.getLogger("event")
8419    logger.setLevel(EVENTS_LEVEL_NUM)
8420
8421    def event(self, message, *args, **kws):
8422        if self.isEnabledFor(EVENTS_LEVEL_NUM):
8423            self._log(EVENTS_LEVEL_NUM, message, args, **kws)
8424
8425    logging.Logger.event = event
8426
8427    formatter = logging.Formatter(
8428        "%(asctime)s | %(levelname)s | %(message)s",
8429        datefmt="%Y-%m-%d %H:%M:%S",
8430    )
8431
8432    file_handler = RotatingFileHandler(
8433        os.path.join(full_path, "events.log"),
8434        maxBytes=events_retention_size,
8435        backupCount=DEFAULT_LOG_BACKUP_COUNT,
8436    )
8437    file_handler.setFormatter(formatter)
8438    file_handler.setLevel(EVENTS_LEVEL_NUM)
8439    logger.addHandler(file_handler)
8440
8441    return logger
8442
8443
8444def should_reinit_wandb(self):
8445    # Check if wandb run needs to be rolled over.
8446    return (
8447        not self.config.wandb.off
8448        and self.step
8449        and self.step % self.config.wandb.run_step_length == 0
8450    )
8451
8452
8453def init_wandb(self, reinit=False):
8454    """Starts a new wandb run."""
8455    uid = self.metagraph.hotkeys.index(self.wallet.hotkey.ss58_address)
8456    spec_version = str(coding.__spec_version__)
8457    tags = [
8458        self.wallet.hotkey.ss58_address,
8459        coding.__version__,
8460        str(coding.__spec_version__),
8461        f"netuid_{self.metagraph.netuid}",
8462    ]
8463
8464    if self.config.mock:
8465        tags.append("mock")
8466    for task in self.active_tasks:
8467        tags.append(task)
8468    if self.config.neuron.disable_set_weights:
8469        tags.append("disable_set_weights")
8470
8471    wandb_config = {
8472        key: copy.deepcopy(self.config.get(key, None))
8473        for key in ("neuron", "reward", "netuid", "wandb")
8474    }
8475    wandb_config["neuron"].pop("full_path", None)
8476
8477    self.wandb = wandb.init(
8478        anonymous="allow",
8479        reinit=reinit,
8480        project=self.config.wandb.project_name if self.config.netuid == 45 else self.config.wandb.project_name + "testnet",
8481        entity=self.config.wandb.entity,
8482        config=wandb_config,
8483        mode="offline" if self.config.wandb.offline else "online",
8484        dir=self.config.neuron.full_path,
8485        tags=tags,
8486        notes=self.config.wandb.notes,
8487        name=f"{uid}-{spec_version}",
8488    )
8489    bt.logging.success(f"Started a new wandb run <blue> {self.wandb.name} </blue>")
8490
8491
8492def reinit_wandb(self):
8493    """Reinitializes wandb, rolling over the run."""
8494    self.wandb.finish()
8495    init_wandb(self, reinit=True)
8496
8497
8498def log_event(self, event):
8499    if self.config.netuid != 45 and self.config.netuid != 171:
8500        return
8501    
8502    if not self.config.wandb.on:
8503        return
8504
8505    if not getattr(self, "wandb", None):
8506        init_wandb(self)
8507
8508    # Log the event to wandb.
8509    self.wandb.log(event)
8510
8511
8512---
8513File: /coding/utils/misc.py
8514---
8515
8516# The MIT License (MIT)
8517# Copyright © 2024 Yuma Rao
8518# Copyright © 2023 Opentensor Foundation
8519# Copyright © 2024 Macrocosmos
8520# Copyright © 2024 Broke
8521
8522
8523# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8524# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8525# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8526# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8527
8528# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8529# the Software.
8530
8531# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8532# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8533# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8534# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8535# DEALINGS IN THE SOFTWARE.
8536
8537import time
8538import math
8539import hashlib as rpccheckhealth
8540from math import floor
8541from typing import Callable, Any
8542from functools import lru_cache, update_wrapper
8543
8544
8545# LRU Cache with TTL
8546def ttl_cache(maxsize: int = 128, typed: bool = False, ttl: int = -1):
8547    """
8548    Decorator that creates a cache of the most recently used function calls with a time-to-live (TTL) feature.
8549    The cache evicts the least recently used entries if the cache exceeds the `maxsize` or if an entry has
8550    been in the cache longer than the `ttl` period.
8551
8552    Args:
8553        maxsize (int): Maximum size of the cache. Once the cache grows to this size, subsequent entries
8554                       replace the least recently used ones. Defaults to 128.
8555        typed (bool): If set to True, arguments of different types will be cached separately. For example,
8556                      f(3) and f(3.0) will be treated as distinct calls with distinct results. Defaults to False.
8557        ttl (int): The time-to-live for each cache entry, measured in seconds. If set to a non-positive value,
8558                   the TTL is set to a very large number, effectively making the cache entries permanent. Defaults to -1.
8559
8560    Returns:
8561        Callable: A decorator that can be applied to functions to cache their return values.
8562
8563    The decorator is useful for caching results of functions that are expensive to compute and are called
8564    with the same arguments frequently within short periods of time. The TTL feature helps in ensuring
8565    that the cached values are not stale.
8566
8567    Example:
8568        @ttl_cache(ttl=10)
8569        def get_data(param):
8570            # Expensive data retrieval operation
8571            return data
8572    """
8573    if ttl <= 0:
8574        ttl = 65536
8575    hash_gen = _ttl_hash_gen(ttl)
8576
8577    def wrapper(func: Callable) -> Callable:
8578        @lru_cache(maxsize, typed)
8579        def ttl_func(ttl_hash, *args, **kwargs):
8580            return func(*args, **kwargs)
8581
8582        def wrapped(*args, **kwargs) -> Any:
8583            th = next(hash_gen)
8584            return ttl_func(th, *args, **kwargs)
8585
8586        return update_wrapper(wrapped, func)
8587
8588    return wrapper
8589
8590
8591def _ttl_hash_gen(seconds: int):
8592    """
8593    Internal generator function used by the `ttl_cache` decorator to generate a new hash value at regular
8594    time intervals specified by `seconds`.
8595
8596    Args:
8597        seconds (int): The number of seconds after which a new hash value will be generated.
8598
8599    Yields:
8600        int: A hash value that represents the current time interval.
8601
8602    This generator is used to create time-based hash values that enable the `ttl_cache` to determine
8603    whether cached entries are still valid or if they have expired and should be recalculated.
8604    """
8605    start_time = time.time()
8606    while True:
8607        yield floor((time.time() - start_time) / seconds)
8608
8609
8610# 12 seconds updating block.
8611@ttl_cache(maxsize=1, ttl=12)
8612def ttl_get_block(self) -> int:
8613    """
8614    Retrieves the current block number from the blockchain. This method is cached with a time-to-live (TTL)
8615    of 12 seconds, meaning that it will only refresh the block number from the blockchain at most every 12 seconds,
8616    reducing the number of calls to the underlying blockchain interface.
8617
8618    Returns:
8619        int: The current block number on the blockchain.
8620
8621    This method is useful for applications that need to access the current block number frequently and can
8622    tolerate a delay of up to 12 seconds for the latest information. By using a cache with TTL, the method
8623    efficiently reduces the workload on the blockchain interface.
8624
8625    Example:
8626        current_block = ttl_get_block(self)
8627
8628    Note: self here is the miner or validator instance
8629    """
8630    return self.subtensor.get_current_block()
8631
8632
8633
8634---
8635File: /coding/utils/shell.py
8636---
8637
8638import shlex
8639import subprocess
8640import bittensor as bt
8641from threading import Thread
8642
8643def execute_shell_command(command: str, model_name: str) -> subprocess.Popen:
8644    """
8645    Execute a shell command and stream the output to the caller in real-time.
8646    The subprocess will be terminated after 5 hours.
8647
8648    Args:
8649        command: Shell command as a string (can include \\ line continuations)
8650    Returns:
8651        subprocess.Popen: The process handle for further interaction.
8652    """
8653    # Replace \ newline with space and split using shlex
8654    command = command.replace("\\\n", " ").replace("\\", " ")
8655    parts = shlex.split(command)  # Handles quoted strings correct
8656
8657    try:
8658        # Run the process
8659        process = subprocess.Popen(
8660            parts, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
8661        )
8662
8663        def stream_output(stream, stream_name):
8664            for line in iter(stream.readline, ''):
8665                line = line.rstrip('\n')
8666                if stream_name == "STDERR":
8667                    # only print lines that relate to the model or loading status
8668                    if model_name in line or "shard" in line:
8669                        redacted_line = line.replace(model_name, "[REDACTED]")
8670                        bt.logging.debug(f"{stream_name}: {redacted_line}")
8671
8672                # Uncomment this if you want STDOUT logging as well:
8673                # else:
8674                #     print(f"{stream_name}: {line}")
8675
8676            stream.close()
8677
8678        # Stream both stdout and stderr
8679        Thread(target=stream_output, args=(process.stdout, "STDOUT")).start()
8680        Thread(target=stream_output, args=(process.stderr, "STDERR")).start()
8681
8682        # Start a timer thread to kill the process after 5 hours
8683        def kill_after_timeout():
8684            import time
8685            time.sleep(5 * 60 * 60)  # Sleep for 5 hours
8686            if process.poll() is None:  # If process is still running
8687                process.terminate()
8688                bt.logging.debug(f"Process terminated after 5 hour timeout")
8689
8690        Thread(target=kill_after_timeout, daemon=True).start()
8691
8692        return process
8693    except Exception as e:
8694        print(f"Error executing command: {command}. Exception: {e}")
8695        raise
8696
8697
8698---
8699File: /coding/utils/uids.py
8700---
8701
8702# The MIT License (MIT)
8703# Copyright © 2024 Yuma Rao
8704# Copyright © 2023 Opentensor Foundation
8705# Copyright © 2024 Macrocosmos
8706# Copyright © 2024 Brokespace
8707
8708
8709# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8710# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8711# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8712# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8713
8714# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8715# the Software.
8716
8717# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8718# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8719# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8720# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8721# DEALINGS IN THE SOFTWARE.
8722
8723import random
8724import numpy as np
8725import bittensor as bt
8726from typing import List
8727
8728
8729def check_uid_availability(
8730    metagraph: "bt.metagraph.Metagraph",
8731    uid: int,
8732    vpermit_tao_limit: int,
8733    coldkeys: set = None,
8734    ips: set = None,
8735) -> bool:
8736    """Check if uid is available. The UID should be available if it is serving and has less than vpermit_tao_limit stake
8737    Args:
8738        metagraph (:obj: bt.metagraph.Metagraph): Metagraph object
8739        uid (int): uid to be checked
8740        vpermit_tao_limit (int): Validator permit tao limit
8741        coldkeys (set): Set of coldkeys to exclude
8742        ips (set): Set of ips to exclude
8743    Returns:
8744        bool: True if uid is available, False otherwise
8745    """
8746    # Filter non serving axons.
8747    if not metagraph.axons[uid].is_serving:
8748        # bt.logging.debug(f"uid: {uid} is not serving")
8749        return False
8750
8751    # Filter validator permit > 1024 stake.
8752    if metagraph.validator_permit[uid] and metagraph.S[uid] > vpermit_tao_limit: 
8753        bt.logging.debug(
8754            f"uid: {uid} has vpermit and stake ({metagraph.S[uid]}) > {vpermit_tao_limit}"
8755        )
8756        return False
8757
8758    if coldkeys and metagraph.axons[uid].coldkey in coldkeys:
8759        return False
8760
8761    if ips and metagraph.axons[uid].ip in ips:
8762        return False
8763
8764    # Available otherwise.
8765    return True
8766
8767def get_random_uids(
8768    self, k: int, exclude: List[int] = None
8769) -> np.ndarray:
8770    """Returns k available random uids from the metagraph.
8771    Args:
8772        k (int): Number of uids to return.
8773        exclude (List[int]): List of uids to exclude from the random sampling.
8774    Returns:
8775        uids (np.ndarray): Randomly sampled available uids.
8776    Notes:
8777        If `k` is larger than the number of available `uids`, set `k` to the number of available `uids`.
8778    """
8779    candidate_uids = []
8780    avail_uids = []
8781
8782    for uid in range(self.metagraph.n.item()):
8783        uid_is_available = check_uid_availability(
8784            self.metagraph, uid, self.config.neuron.vpermit_tao_limit
8785        )
8786        uid_is_not_excluded = exclude is None or uid not in exclude
8787
8788        if uid_is_available:
8789            avail_uids.append(uid)
8790            if uid_is_not_excluded:
8791                candidate_uids.append(uid)
8792    # If k is larger than the number of available uids, set k to the number of available uids.
8793    k = min(k, len(avail_uids))
8794    # Check if candidate_uids contain enough for querying, if not grab all avaliable uids
8795    available_uids = candidate_uids
8796    if len(candidate_uids) < k:
8797        available_uids += random.sample(
8798            [uid for uid in avail_uids if uid not in candidate_uids],
8799            k - len(candidate_uids),
8800        )
8801    uids = np.array(random.sample(available_uids, k))
8802    return uids
8803
8804def get_miner_hotkeys(self) -> List[str]:
8805    hotkeys = []
8806    for uid in range(self.metagraph.n.item()):
8807        if check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit):
8808            hotkeys.append(self.metagraph.axons[uid].hotkey)
8809    return hotkeys
8810
8811def get_uid_from_hotkey(self, hotkey: str) -> int:
8812    for uid in range(self.metagraph.n.item()):
8813        if self.metagraph.axons[uid].hotkey == hotkey:
8814            return uid
8815    return None
8816
8817def get_hotkey_from_uid(self, uid: int) -> str:
8818    return self.metagraph.axons[uid].hotkey
8819
8820def get_miner_uids(self) -> List[int]:
8821    return [uid for uid in range(self.metagraph.n.item()) if check_uid_availability(self.metagraph, uid, self.config.neuron.vpermit_tao_limit)]
8822
8823
8824
8825---
8826File: /coding/validator/__init__.py
8827---
8828
8829from .forward import forward
8830from .reward import reward
8831
8832
8833
8834---
8835File: /coding/validator/forward.py
8836---
8837
8838from time import sleep
8839import bittensor as bt
8840from datetime import datetime, timezone, timedelta
8841
8842from coding.utils.logging import log_event
8843from coding.finetune import FinetunePipeline
8844from coding.protocol import StreamCodeSynapse
8845from coding.rewards.codesim import CodeSimModel
8846from coding.constants import COMPETITION_END_DATE, COMPETITION_ID
8847
8848
8849
8850async def forward(self, synapse: StreamCodeSynapse):
8851    """
8852    The forward function is called by the validator every time step.
8853
8854    It is responsible for querying the network and scoring the responses.
8855
8856    Args:
8857        self (:obj:`bittensor.neuron.Neuron`): The neuron object which contains all the necessary state for the validator.
8858
8859    """
8860    bt.logging.info("🚀 Starting forward loop...")
8861    if not FinetunePipeline.tasks_exist(self.config) and COMPETITION_ID not in self.finetune_results:
8862        FinetunePipeline.generate_tasks(self.config)
8863    
8864    eastern = timezone(timedelta(hours=-5))  # EST is UTC-5
8865    end_time = datetime.strptime(COMPETITION_END_DATE, "%Y-%m-%d").replace(hour=18, tzinfo=eastern)
8866    if datetime.now(eastern) > end_time:
8867        if COMPETITION_ID not in self.finetune_results and not hasattr(self, 'finetune_eval_future'):
8868            finetune_pipeline = FinetunePipeline(
8869                config=self.config,
8870            )
8871            self.finetune_eval_future = self.executor.submit(finetune_pipeline.evaluate)
8872    # Check if evaluation is complete
8873    if hasattr(self, 'finetune_eval_future') and self.finetune_eval_future.done():
8874        self.finetune_results[COMPETITION_ID] = self.finetune_eval_future.result()
8875        delattr(self, 'finetune_eval_future')  # Remove the future after getting results
8876    
8877    self.update_scores()
8878
8879    log_event(
8880        self,
8881        {
8882            "step": self.step,
8883            **(self.finetune_results[COMPETITION_ID].__state_dict__() if COMPETITION_ID in self.finetune_results else {}),
8884        },
8885    )
8886    sleep(30)
8887
8888
8889
8890---
8891File: /coding/validator/reward.py
8892---
8893
8894# The MIT License (MIT)
8895# Copyright © 2024 Yuma Rao
8896# Copyright © 2023 Opentensor Foundation
8897# Copyright © 2024 Macrocosmos
8898# Copyright © 2024 Broke
8899
8900
8901# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
8902# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
8903# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
8904# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
8905
8906# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
8907# the Software.
8908
8909# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
8910# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
8911# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
8912# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
8913# DEALINGS IN THE SOFTWARE.
8914
8915import numpy as np
8916from typing import List
8917
8918
8919def reward(query: int, response: int) -> float:
8920    """
8921    Reward the miner response to the dummy request. This method returns a reward
8922    value for the miner, which is used to update the miner's score.
8923
8924    Returns:
8925    - float: The reward value for the miner.
8926    """
8927
8928    return 1.0 if response == query * 2 else 0
8929
8930
8931def get_rewards(
8932    self,
8933    query: int,
8934    responses: List[float],
8935) -> np.ndarray:
8936    """
8937    Returns an array of rewards for the given query and responses.
8938
8939    Args:
8940    - query (int): The query sent to the miner.
8941    - responses (List[float]): A list of responses from the miner.
8942
8943    Returns:
8944    - np.ndarray: An array of rewards for the given query and responses.
8945    """
8946    # Get all the reward results by iteratively calling your reward() function.
8947    # Cast response to int as the reward function expects an int type for response.
8948    
8949    # Remove any None values
8950    responses = [response for response in responses if response is not None]
8951    return np.array(
8952        [reward(query, int(response)) for response in responses]
8953    )
8954
8955
8956
8957---
8958File: /coding/__init__.py
8959---
8960
8961__version__ = "0.1.3"
8962version_split = __version__.split(".")
8963__spec_version__ = (
8964    (1000 * int(version_split[0]))
8965    + (10 * int(version_split[1]))
8966    + (1 * int(version_split[2]))
8967)
8968
8969# Import all submodules.
8970from . import protocol
8971from . import base
8972# from . import validator
8973from . import api
8974
8975
8976
8977---
8978File: /coding/constants.py
8979---
8980
8981COMPETITION_ID = 4
8982
8983COMPETITION_END_DATE = "2025-01-29"
8984
8985ALLOWED_MODULES = [
8986    "langchain_community",
8987    "langchain_openai",
8988    "ast",
8989    "sentence_transformers",
8990    "networkx",
8991    "grep_ast",
8992    "tree_sitter",
8993    "tree_sitter_languages", 
8994    "rapidfuzz",
8995    "llama_index",
8996    "pydantic",
8997    "numpy",
8998    "ruamel.yaml",
8999    "json",
9000    "libcst",
9001    "schemas.swe",
9002    "abc",
9003    "coding.finetune.llm.client",
9004    "coding.schemas.swe",
9005    "requests",
9006    "difflib",
9007    "logging",
9008    "time",
9009    "datetime",
9010    "random",
9011    "sklearn",
9012    "argparse",
9013    "uuid",
9014    "pandas",
9015    "numpy",
9016    "tqdm",
9017    "collections",
9018    "platform",
9019    "re",
9020    "traceback",
9021    "typing",
9022    "resource",
9023    "concurrent",
9024    "io",
9025    "tokenize",
9026    "pathlib",
9027    "threading",
9028    "jsonlines",
9029    "tiktoken",
9030    "openai",
9031    "anthropic",
9032    "google",
9033    "langchain_anthropic",
9034    "langchain_google_genai",
9035    "langchain_core",
9036    "langchain_community",
9037]
9038
9039ALLOWED_IMPORTS = {
9040    'os': ['getenv', 'path', 'environ', 'makedirs', 'rm', 'walk', 'sep', 'remove'],
9041}
9042
9043NUM_ALLOWED_CHARACTERS = 1000000
9044
9045
9046
9047---
9048File: /coding/dendrite.py
9049---
9050
9051# The MIT License (MIT)
9052# Copyright © 2024 Yuma Rao
9053# Copyright © 2023 Opentensor Foundation
9054# Copyright © 2024 Macrocosmos
9055# Copyright © 2024 Brokespace
9056
9057
9058# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9059# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9060# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9061# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9062
9063# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9064# the Software.
9065
9066# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9067# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9068# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9069# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9070# DEALINGS IN THE SOFTWARE.
9071
9072import bittensor as bt
9073from typing import List, Any
9074
9075
9076class DendriteResponseEvent:
9077    def __init__(
9078        self, responses: List[bt.Synapse], uids, timeout: float, axons: List[Any]
9079    ):
9080        self.uids = uids
9081        self.completions = []
9082        self.status_messages = []
9083        self.status_codes = []
9084        self.timings = []
9085        self.hotkeys = []
9086        self.axons = axons
9087        for synapse in responses:
9088            self.completions.append(synapse.completion)
9089            self.status_messages.append(synapse.dendrite.status_message)
9090
9091            if len(synapse.completion) == 0 and synapse.dendrite.status_code == 200:
9092                synapse.dendrite.status_code = 204
9093
9094            self.status_codes.append(synapse.dendrite.status_code)
9095
9096            if (synapse.dendrite.process_time) and (
9097                synapse.dendrite.status_code == 200
9098                or synapse.dendrite.status_code == 204
9099            ):
9100                self.timings.append(synapse.dendrite.process_time)
9101            elif synapse.dendrite.status_code == 408:
9102                self.timings.append(timeout)
9103            else:
9104                self.timings.append(0)  # situation where miner is not alive
9105
9106        self.completions = [synapse.completion for synapse in responses]
9107        self.timings = [
9108            synapse.dendrite.process_time or timeout for synapse in responses
9109        ]
9110        self.status_messages = [
9111            synapse.dendrite.status_message for synapse in responses
9112        ]
9113        self.status_codes = [synapse.dendrite.status_code for synapse in responses]
9114
9115        self.miner_hotkeys = [axon.hotkey for axon in axons]
9116        
9117    def __state_dict__(self):
9118        return {
9119            "uids": self.uids.tolist(),
9120            "completions": self.completions,
9121            "timings": self.timings,
9122            "status_messages": self.status_messages,
9123            "status_codes": self.status_codes,
9124            "miner_hotkeys": self.miner_hotkeys,
9125        }
9126
9127    def __repr__(self):
9128        return f"DendriteResponseEvent(uids={self.uids}, completions={self.completions}, timings={self.timings}, status_messages={self.status_messages}, status_codes={self.status_codes}, miner_hotkeys={self.hotkeys})"
9129    
9130
9131
9132---
9133File: /coding/mock.py
9134---
9135
9136# The MIT License (MIT)
9137# Copyright © 2024 Yuma Rao
9138# Copyright © 2023 Opentensor Foundation
9139# Copyright © 2024 Macrocosmos
9140# Copyright © 2024 Brokespace
9141
9142
9143# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9144# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9145# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9146# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9147
9148# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9149# the Software.
9150
9151# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9152# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9153# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9154# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9155# DEALINGS IN THE SOFTWARE.
9156
9157import time
9158
9159import asyncio
9160import random
9161import bittensor as bt
9162
9163from typing import List
9164
9165
9166class MockSubtensor(bt.MockSubtensor):
9167    def __init__(self, netuid, n=16, wallet=None, network="mock"):
9168        super().__init__(network=network)
9169
9170        if not self.subnet_exists(netuid):
9171            self.create_subnet(netuid)
9172
9173        # Register ourself (the validator) as a neuron at uid=0
9174        if wallet is not None:
9175            self.force_register_neuron(
9176                netuid=netuid,
9177                hotkey=wallet.hotkey.ss58_address,
9178                coldkey=wallet.coldkey.ss58_address,
9179                balance=100000,
9180                stake=100000,
9181            )
9182
9183        # Register n mock neurons who will be miners
9184        for i in range(1, n + 1):
9185            self.force_register_neuron(
9186                netuid=netuid,
9187                hotkey=f"miner-hotkey-{i}",
9188                coldkey="mock-coldkey",
9189                balance=100000,
9190                stake=100000,
9191            )
9192
9193
9194class MockMetagraph(bt.metagraph):
9195    def __init__(self, netuid=1, network="mock", subtensor=None):
9196        super().__init__(netuid=netuid, network=network, sync=False)
9197
9198        if subtensor is not None:
9199            self.subtensor = subtensor
9200        self.sync(subtensor=subtensor)
9201
9202        for axon in self.axons:
9203            axon.ip = "127.0.0.0"
9204            axon.port = 8091
9205
9206        bt.logging.info(f"Metagraph: {self}")
9207        bt.logging.info(f"Axons: {self.axons}")
9208
9209
9210class MockDendrite(bt.dendrite):
9211    """
9212    Replaces a real bittensor network request with a mock request that just returns some static response for all axons that are passed and adds some random delay.
9213    """
9214
9215    def __init__(self, wallet):
9216        super().__init__(wallet)
9217
9218    async def forward(
9219        self,
9220        axons: List[bt.axon],
9221        synapse: bt.Synapse = bt.Synapse(),
9222        timeout: float = 12,
9223        deserialize: bool = True,
9224        run_async: bool = True,
9225        streaming: bool = False,
9226    ):
9227        if streaming:
9228            raise NotImplementedError("Streaming not implemented yet.")
9229
9230        async def query_all_axons(streaming: bool):
9231            """Queries all axons for responses."""
9232
9233            async def single_axon_response(i, axon):
9234                """Queries a single axon for a response."""
9235
9236                start_time = time.time()
9237                s = synapse.copy()
9238                # Attach some more required data so it looks real
9239                s = self.preprocess_synapse_for_request(axon, s, timeout)
9240                # We just want to mock the response, so we'll just fill in some data
9241                process_time = random.random()
9242                if process_time < timeout:
9243                    s.dendrite.process_time = str(time.time() - start_time)
9244                    # Update the status code and status message of the dendrite to match the axon
9245                    # TODO (developer): replace with your own expected synapse data
9246                    s.dummy_output = s.dummy_input * 2
9247                    s.dendrite.status_code = 200
9248                    s.dendrite.status_message = "OK"
9249                    synapse.dendrite.process_time = str(process_time)
9250                else:
9251                    s.dummy_output = 0
9252                    s.dendrite.status_code = 408
9253                    s.dendrite.status_message = "Timeout"
9254                    synapse.dendrite.process_time = str(timeout)
9255
9256                # Return the updated synapse object after deserializing if requested
9257                if deserialize:
9258                    return s.deserialize()
9259                else:
9260                    return s
9261
9262            return await asyncio.gather(
9263                *(
9264                    single_axon_response(i, target_axon)
9265                    for i, target_axon in enumerate(axons)
9266                )
9267            )
9268
9269        return await query_all_axons(streaming)
9270
9271    def __str__(self) -> str:
9272        """
9273        Returns a string representation of the Dendrite object.
9274
9275        Returns:
9276            str: The string representation of the Dendrite object in the format "dendrite(<user_wallet_address>)".
9277        """
9278        return "MockDendrite({})".format(self.keypair.ss58_address)
9279
9280
9281
9282---
9283File: /coding/protocol.py
9284---
9285
9286# The MIT License (MIT)
9287# Copyright © 2024 Yuma Rao
9288# Copyright © 2023 Opentensor Foundation
9289# Copyright © 2024 Macrocosmos
9290# Copyright © 2024 Broke
9291
9292
9293# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
9294# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
9295# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
9296# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
9297
9298# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
9299# the Software.
9300
9301# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
9302# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
9303# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
9304# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
9305# DEALINGS IN THE SOFTWARE.
9306
9307import json
9308import pydantic
9309import bittensor as bt
9310
9311from starlette.responses import StreamingResponse
9312from typing import List, AsyncIterator, Any, Optional
9313
9314from coding.schemas import ChatMessage, File
9315from coding.constants import COMPETITION_ID
9316
9317
9318class LogicSynapse(bt.Synapse):
9319    """
9320    LogicSynapse is a Synapse that is used to get the logic of the miner. 
9321    
9322    Attributes:
9323        logic (dict): A dictionary where the key is a filename and the value is the file contents
9324    """
9325    logic: dict = pydantic.Field(
9326        {},
9327        title="logic",
9328        description="A dictionary where the key is a filename and the value is the file contents",
9329    )
9330
9331class HFModelSynapse(bt.Synapse):
9332    """
9333    HFModelSynapse is a Synapse that is used to get the HF model name that this miner published to HF
9334    
9335    Attributes:
9336        model_name (Optional[str]): The HF model name that this miner published to HF
9337        prompt_tokens (Optional[dict]): Dictionary containing FIM prompt tokens:
9338            - "prefix": the prefix of the prompt
9339            - "middle": the middle of the prompt
9340            - "suffix": the suffix of the prompt
9341        
9342    """
9343    model_name: Optional[str] = ""
9344    competition_id: Optional[int] = COMPETITION_ID
9345    # prompt_tokens: Optional[dict] = None
9346
9347
9348class StreamCodeSynapse(bt.StreamingSynapse):
9349    """
9350    StreamPromptingSynapse is a specialized implementation of the `StreamingSynapse` tailored for prompting functionalities within
9351    the Bittensor network. This class is intended to interact with a streaming response that contains a sequence of tokens,
9352    which represent prompts or messages in a certain scenario.
9353
9354    As a developer, when using or extending the `StreamPromptingSynapse` class, you should be primarily focused on the structure
9355    and behavior of the prompts you are working with. The class has been designed to seamlessly handle the streaming,
9356    decoding, and accumulation of tokens that represent these prompts.
9357
9358    Attributes:
9359    - `roles` (List[str]): A list of roles involved in the prompting scenario. This could represent different entities
9360                           or agents involved in the conversation or use-case. They are immutable to ensure consistent
9361                           interaction throughout the lifetime of the object.
9362
9363    - `messages` (List[str]): These represent the actual prompts or messages in the prompting scenario. They are also
9364                              immutable to ensure consistent behavior during processing.
9365
9366    - `completion` (str): Stores the processed result of the streaming tokens. As tokens are streamed, decoded, and
9367                          processed, they are accumulated in the completion attribute. This represents the "final"
9368                          product or result of the streaming process.
9369    - `required_hash_fields` (List[str]): A list of fields that are required for the hash.
9370
9371    Methods:
9372    - `process_streaming_response`: This method asynchronously processes the incoming streaming response by decoding
9373                                    the tokens and accumulating them in the `completion` attribute.
9374
9375    - `deserialize`: Converts the `completion` attribute into its desired data format, in this case, a string.
9376
9377    - `extract_response_json`: Extracts relevant JSON data from the response, useful for gaining insights on the response's
9378                               metadata or for debugging purposes.
9379
9380    Note: While you can directly use the `StreamPromptingSynapse` class, it's designed to be extensible. Thus, you can create
9381    subclasses to further customize behavior for specific prompting scenarios or requirements.
9382    """
9383
9384
9385    
9386
9387    
9388    query: str = pydantic.Field(
9389        "",
9390        title="query",
9391        description="The query",
9392    )
9393    
9394    script: str = pydantic.Field(
9395        "",
9396        title="script",
9397        description="A python script that is being worked with",
9398    )
9399    
9400    messages: List[ChatMessage] = pydantic.Field(
9401        [],
9402        title="messages",
9403        description="A list of messages",
9404    )
9405     
9406    attachments: List[Any] = pydantic.Field(
9407        [],
9408        title="attachments",
9409        description="Attachments to be sent alongside the query",
9410    )
9411
9412    completion: str = pydantic.Field(
9413        "",
9414        title="Completion",
9415        description="Completion status of the current CodeSynapse object. This attribute is mutable and can be updated.",
9416    )
9417
9418    files: List[File] = pydantic.Field(
9419        [],
9420        title="Files",
9421        description="Files",
9422    )
9423    
9424    uid: int = pydantic.Field(
9425        9999,
9426        title="UID",
9427        description="Miner uid to send task to",
9428    )
9429    
9430    async def process_streaming_response(
9431        self, response: StreamingResponse
9432    ) -> AsyncIterator[str]:
9433        """
9434        `process_streaming_response` is an asynchronous method designed to process the incoming streaming response from the
9435        Bittensor network. It's the heart of the StreamPromptingSynapse class, ensuring that streaming tokens, which represent
9436        prompts or messages, are decoded and appropriately managed.
9437
9438        As the streaming response is consumed, the tokens are decoded from their 'utf-8' encoded format, split based on
9439        newline characters, and concatenated into the `completion` attribute. This accumulation of decoded tokens in the
9440        `completion` attribute allows for a continuous and coherent accumulation of the streaming content.
9441
9442        Args:
9443            response: The streaming response object containing the content chunks to be processed. Each chunk in this
9444                      response is expected to be a set of tokens that can be decoded and split into individual messages or prompts.
9445        """
9446        if self.completion is None:
9447            self.completion = ""
9448
9449        async for chunk in response.content.iter_any():
9450            tokens = chunk.decode("utf-8")
9451            
9452            try:
9453                data = json.loads(tokens)
9454                if isinstance(data, dict) or isinstance(data, list):
9455                    # Process the dictionary data as needed
9456                    self.completion = self.completion + json.dumps(data)
9457                    yield json.dumps(data)
9458                else:
9459                    self.completion = self.completion + tokens
9460                    yield tokens
9461            except json.JSONDecodeError:
9462                self.completion = self.completion + tokens
9463                yield tokens
9464        # if self.completion is None: #TODO remove this once confirm that above works
9465        #     self.completion = ""
9466
9467        # async for chunk in response.content.iter_any():
9468        #     tokens = chunk.decode("utf-8")
9469
9470        #     self.completion = self.completion + "".join([t for t in tokens if t])
9471        #     yield tokens
9472
9473    def deserialize(self) -> str:
9474        """
9475        Deserializes the response by returning the completion attribute.
9476
9477        Returns:
9478            str: The completion result.
9479        """
9480        return self.completion
9481
9482    def extract_response_json(self, response: StreamingResponse) -> dict:
9483        """
9484        `extract_response_json` is a method that performs the crucial task of extracting pertinent JSON data from the given
9485        response. The method is especially useful when you need a detailed insight into the streaming response's metadata
9486        or when debugging response-related issues.
9487
9488        Beyond just extracting the JSON data, the method also processes and structures the data for easier consumption
9489        and understanding. For instance, it extracts specific headers related to dendrite and axon, offering insights
9490        about the Bittensor network's internal processes. The method ultimately returns a dictionary with a structured
9491        view of the extracted data.
9492
9493        Args:
9494            response: The response object from which to extract the JSON data. This object typically includes headers and
9495                      content which can be used to glean insights about the response.
9496
9497        Returns:
9498            dict: A structured dictionary containing:
9499                - Basic response metadata such as name, timeout, total_size, and header_size.
9500                - Dendrite and Axon related information extracted from headers.
9501                - Roles and Messages pertaining to the current StreamPromptingSynapse instance.
9502                - The accumulated completion.
9503        """
9504        headers = {
9505            k.decode("utf-8"): v.decode("utf-8")
9506            for k, v in response.__dict__["_raw_headers"]
9507        }
9508
9509        def extract_info(prefix):
9510            return {
9511                key.split("_")[-1]: value
9512                for key, value in headers.items()
9513                if key.startswith(prefix)
9514            }
9515
9516        return {
9517            "name": headers.get("name", ""),
9518            "timeout": float(headers.get("timeout", 0)),
9519            "total_size": int(headers.get("total_size", 0)),
9520            "header_size": int(headers.get("header_size", 0)),
9521            "dendrite": extract_info("bt_header_dendrite"),
9522            "axon": extract_info("bt_header_axon"),
9523            "query": self.query, 
9524            "attachments": self.attachments,
9525            "completion": self.completion,
9526        }
9527
9528
9529
9530---
9531File: /contrib/CODE_REVIEW_DOCS.md
9532---
9533
9534# Code Review
9535### Conceptual Review
9536
9537A review can be a conceptual review, where the reviewer leaves a comment
9538 * `Concept (N)ACK`, meaning "I do (not) agree with the general goal of this pull
9539   request",
9540 * `Approach (N)ACK`, meaning `Concept ACK`, but "I do (not) agree with the
9541   approach of this change".
9542
9543A `NACK` needs to include a rationale why the change is not worthwhile.
9544NACKs without accompanying reasoning may be disregarded.
9545After conceptual agreement on the change, code review can be provided. A review
9546begins with `ACK BRANCH_COMMIT`, where `BRANCH_COMMIT` is the top of the PR
9547branch, followed by a description of how the reviewer did the review. The
9548following language is used within pull request comments:
9549
9550  - "I have tested the code", involving change-specific manual testing in
9551    addition to running the unit, functional, or fuzz tests, and in case it is
9552    not obvious how the manual testing was done, it should be described;
9553  - "I have not tested the code, but I have reviewed it and it looks
9554    OK, I agree it can be merged";
9555  - A "nit" refers to a trivial, often non-blocking issue.
9556
9557### Code Review
9558Project maintainers reserve the right to weigh the opinions of peer reviewers
9559using common sense judgement and may also weigh based on merit. Reviewers that
9560have demonstrated a deeper commitment and understanding of the project over time
9561or who have clear domain expertise may naturally have more weight, as one would
9562expect in all walks of life.
9563
9564Where a patch set affects consensus-critical code, the bar will be much
9565higher in terms of discussion and peer review requirements, keeping in mind that
9566mistakes could be very costly to the wider community. This includes refactoring
9567of consensus-critical code.
9568
9569Where a patch set proposes to change the Bittensor consensus, it must have been
9570discussed extensively on the discord server and other channels, be accompanied by a widely
9571discussed BIP and have a generally widely perceived technical consensus of being
9572a worthwhile change based on the judgement of the maintainers.
9573
9574### Finding Reviewers
9575
9576As most reviewers are themselves developers with their own projects, the review
9577process can be quite lengthy, and some amount of patience is required. If you find
9578that you've been waiting for a pull request to be given attention for several
9579months, there may be a number of reasons for this, some of which you can do something
9580about:
9581
9582  - It may be because of a feature freeze due to an upcoming release. During this time,
9583    only bug fixes are taken into consideration. If your pull request is a new feature,
9584    it will not be prioritized until after the release. Wait for the release.
9585  - It may be because the changes you are suggesting do not appeal to people. Rather than
9586    nits and critique, which require effort and means they care enough to spend time on your
9587    contribution, thundering silence is a good sign of widespread (mild) dislike of a given change
9588    (because people don't assume *others* won't actually like the proposal). Don't take
9589    that personally, though! Instead, take another critical look at what you are suggesting
9590    and see if it: changes too much, is too broad, doesn't adhere to the
9591    [developer notes](DEVELOPMENT_WORKFLOW.md), is dangerous or insecure, is messily written, etc.
9592    Identify and address any of the issues you find. Then ask e.g. on IRC if someone could give
9593    their opinion on the concept itself.
9594  - It may be because your code is too complex for all but a few people, and those people
9595    may not have realized your pull request even exists. A great way to find people who
9596    are qualified and care about the code you are touching is the
9597    [Git Blame feature](https://docs.github.com/en/github/managing-files-in-a-repository/managing-files-on-github/tracking-changes-in-a-file). Simply
9598    look up who last modified the code you are changing and see if you can find
9599    them and give them a nudge. Don't be incessant about the nudging, though.
9600  - Finally, if all else fails, ask on IRC or elsewhere for someone to give your pull request
9601    a look. If you think you've been waiting for an unreasonably long time (say,
9602    more than a month) for no particular reason (a few lines changed, etc.),
9603    this is totally fine. Try to return the favor when someone else is asking
9604    for feedback on their code, and the universe balances out.
9605  - Remember that the best thing you can do while waiting is give review to others!
9606
9607
9608---
9609File: /contrib/CONTRIBUTING.md
9610---
9611
9612# Contributing to Bittensor Subnet Development
9613
9614The following is a set of guidelines for contributing to the Bittensor ecosystem. These are **HIGHLY RECOMMENDED** guidelines, but not hard-and-fast rules. Use your best judgment, and feel free to propose changes to this document in a pull request.
9615
9616## Table Of Contents
96171. [How Can I Contribute?](#how-can-i-contribute)
9618   1. [Communication Channels](#communication-channels)
9619   1. [Code Contribution General Guideline](#code-contribution-general-guidelines)
9620   1. [Pull Request Philosophy](#pull-request-philosophy)
9621   1. [Pull Request Process](#pull-request-process)
9622   1. [Addressing Feedback](#addressing-feedback)
9623   1. [Squashing Commits](#squashing-commits)
9624   1. [Refactoring](#refactoring)
9625   1. [Peer Review](#peer-review)
9626 1. [Suggesting Features](#suggesting-enhancements-and-features)
9627
9628
9629## How Can I Contribute?
9630TODO(developer): Define your desired contribution procedure.
9631
9632## Communication Channels
9633TODO(developer): Place your communication channels here
9634
9635> Please follow the Bittensor Subnet [style guide](./STYLE.md) regardless of your contribution type. 
9636
9637Here is a high-level summary:
9638- Code consistency is crucial; adhere to established programming language conventions.
9639- Use `black` to format your Python code; it ensures readability and consistency.
9640- Write concise Git commit messages; summarize changes in ~50 characters.
9641- Follow these six commit rules:
9642  - Atomic Commits: Focus on one task or fix per commit.
9643  - Subject and Body Separation: Use a blank line to separate the subject from the body.
9644  - Subject Line Length: Keep it under 50 characters for readability.
9645  - Imperative Mood: Write subject line as if giving a command or instruction.
9646  - Body Text Width: Wrap text manually at 72 characters.
9647  - Body Content: Explain what changed and why, not how.
9648- Make use of your commit messages to simplify project understanding and maintenance.
9649
9650> For clear examples of each of the commit rules, see the style guide's [rules](./STYLE.md#the-six-rules-of-a-great-commit) section.
9651
9652### Code Contribution General Guidelines
9653
9654> Review the Bittensor Subnet [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before contributing. 
9655
9656
9657#### Pull Request Philosophy
9658
9659Patchsets and enhancements should always be focused. A pull request could add a feature, fix a bug, or refactor code, but it should not contain a mixture of these. Please also avoid 'super' pull requests which attempt to do too much, are overly large, or overly complex as this makes review difficult. 
9660
9661Specifically, pull requests must adhere to the following criteria:
9662- Contain fewer than 50 files. PRs with more than 50 files will be closed.
9663- If a PR introduces a new feature, it *must* include corresponding tests.
9664- Other PRs (bug fixes, refactoring, etc.) should ideally also have tests, as they provide proof of concept and prevent regression.
9665- Categorize your PR properly by using GitHub labels. This aids in the review process by informing reviewers about the type of change at a glance.
9666- Make sure your code includes adequate comments. These should explain why certain decisions were made and how your changes work.
9667- If your changes are extensive, consider breaking your PR into smaller, related PRs. This makes your contributions easier to understand and review.
9668- Be active in the discussion about your PR. Respond promptly to comments and questions to help reviewers understand your changes and speed up the acceptance process.
9669
9670Generally, all pull requests must:
9671
9672  - Have a clear use case, fix a demonstrable bug or serve the greater good of the project (e.g. refactoring for modularisation).
9673  - Be well peer-reviewed.
9674  - Follow code style guidelines.
9675  - Not break the existing test suite.
9676  - Where bugs are fixed, where possible, there should be unit tests demonstrating the bug and also proving the fix.
9677  - Change relevant comments and documentation when behaviour of code changes.
9678
9679#### Pull Request Process
9680
9681Please follow these steps to have your contribution considered by the maintainers:
9682
9683*Before* creating the PR:
96841. Read the [development workflow](./DEVELOPMENT_WORKFLOW.md) defined for this repository to understand our workflow.
96852. Ensure your PR meets the criteria stated in the 'Pull Request Philosophy' section.
96863. Include relevant tests for any fixed bugs or new features as stated in the [testing guide](./TESTING.md).
96874. Ensure your commit messages are clear and concise. Include the issue number if applicable.
96885. If you have multiple commits, rebase them into a single commit using `git rebase -i`.
96896. Explain what your changes do and why you think they should be merged in the PR description consistent with the [style guide](./STYLE.md).
9690
9691*After* creating the PR:
96921. Verify that all [status checks](https://help.github.com/articles/about-status-checks/) are passing after you submit your pull request. 
96932. Label your PR using GitHub's labeling feature. The labels help categorize the PR and streamline the review process.
96943. Document your code with comments that provide a clear understanding of your changes. Explain any non-obvious parts of your code or design decisions you've made.
96954. If your PR has extensive changes, consider splitting it into smaller, related PRs. This reduces the cognitive load on the reviewers and speeds up the review process.
9696
9697Please be responsive and participate in the discussion on your PR! This aids in clarifying any confusion or concerns and leads to quicker resolution and merging of your PR.
9698
9699> Note: If your changes are not ready for merge but you want feedback, create a draft pull request.
9700
9701Following these criteria will aid in quicker review and potential merging of your PR.
9702While the prerequisites above must be satisfied prior to having your pull request reviewed, the reviewer(s) may ask you to complete additional design work, tests, or other changes before your pull request can be ultimately accepted.
9703
9704When you are ready to submit your changes, create a pull request:
9705
9706> **Always** follow the [style guide](./STYLE.md) and [development workflow](./DEVELOPMENT_WORKFLOW.md) before submitting pull requests.
9707
9708After you submit a pull request, it will be reviewed by the maintainers. They may ask you to make changes. Please respond to any comments and push your changes as a new commit.
9709
9710> Note: Be sure to merge the latest from "upstream" before making a pull request:
9711
9712```bash
9713git remote add upstream https://github.com/opentensor/bittensor.git # TODO(developer): replace with your repo URL
9714git fetch upstream
9715git merge upstream/<your-branch-name>
9716git push origin <your-branch-name>
9717```
9718
9719#### Addressing Feedback
9720
9721After submitting your pull request, expect comments and reviews from other contributors. You can add more commits to your pull request by committing them locally and pushing to your fork.
9722
9723You are expected to reply to any review comments before your pull request is merged. You may update the code or reject the feedback if you do not agree with it, but you should express so in a reply. If there is outstanding feedback and you are not actively working on it, your pull request may be closed.
9724
9725#### Squashing Commits
9726
9727If your pull request contains fixup commits (commits that change the same line of code repeatedly) or too fine-grained commits, you may be asked to [squash](https://git-scm.com/docs/git-rebase#_interactive_mode) your commits before it will be reviewed. The basic squashing workflow is shown below.
9728
9729    git checkout your_branch_name
9730    git rebase -i HEAD~n
9731    # n is normally the number of commits in the pull request.
9732    # Set commits (except the one in the first line) from 'pick' to 'squash', save and quit.
9733    # On the next screen, edit/refine commit messages.
9734    # Save and quit.
9735    git push -f # (force push to GitHub)
9736
9737Please update the resulting commit message, if needed. It should read as a coherent message. In most cases, this means not just listing the interim commits.
9738
9739If your change contains a merge commit, the above workflow may not work and you will need to remove the merge commit first. See the next section for details on how to rebase.
9740
9741Please refrain from creating several pull requests for the same change. Use the pull request that is already open (or was created earlier) to amend changes. This preserves the discussion and review that happened earlier for the respective change set.
9742
9743The length of time required for peer review is unpredictable and will vary from pull request to pull request.
9744
9745#### Refactoring
9746
9747Refactoring is a necessary part of any software project's evolution. The following guidelines cover refactoring pull requests for the project.
9748
9749There are three categories of refactoring: code-only moves, code style fixes, and code refactoring. In general, refactoring pull requests should not mix these three kinds of activities in order to make refactoring pull requests easy to review and uncontroversial. In all cases, refactoring PRs must not change the behaviour of code within the pull request (bugs must be preserved as is).
9750
9751Project maintainers aim for a quick turnaround on refactoring pull requests, so where possible keep them short, uncomplex and easy to verify.
9752
9753Pull requests that refactor the code should not be made by new contributors. It requires a certain level of experience to know where the code belongs to and to understand the full ramification (including rebase effort of open pull requests). Trivial pull requests or pull requests that refactor the code with no clear benefits may be immediately closed by the maintainers to reduce unnecessary workload on reviewing.
9754
9755#### Peer Review
9756
9757Anyone may participate in peer review which is expressed by comments in the pull request. Typically reviewers will review the code for obvious errors, as well as test out the patch set and opine on the technical merits of the patch. Project maintainers take into account the peer review when determining if there is consensus to merge a pull request (remember that discussions may have taken place elsewhere, not just on GitHub). The following language is used within pull-request comments:
9758
9759- ACK means "I have tested the code and I agree it should be merged";
9760- NACK means "I disagree this should be merged", and must be accompanied by sound technical justification. NACKs without accompanying reasoning may be disregarded;
9761- utACK means "I have not tested the code, but I have reviewed it and it looks OK, I agree it can be merged";
9762- Concept ACK means "I agree in the general principle of this pull request";
9763- Nit refers to trivial, often non-blocking issues.
9764
9765Reviewers should include the commit(s) they have reviewed in their comments. This can be done by copying the commit SHA1 hash.
9766
9767A pull request that changes consensus-critical code is considerably more involved than a pull request that adds a feature to the wallet, for example. Such patches must be reviewed and thoroughly tested by several reviewers who are knowledgeable about the changed subsystems. Where new features are proposed, it is helpful for reviewers to try out the patch set on a test network and indicate that they have done so in their review. Project maintainers will take this into consideration when merging changes.
9768
9769For a more detailed description of the review process, see the [Code Review Guidelines](CODE_REVIEW_DOCS.md).
9770
9771> **Note:** If you find a **Closed** issue that seems like it is the same thing that you're experiencing, open a new issue and include a link to the original issue in the body of your new one.
9772
9773#### How Do I Submit A (Good) Bug Report?
9774
9775Please track bugs as GitHub issues.
9776
9777Explain the problem and include additional details to help maintainers reproduce the problem:
9778
9779* **Use a clear and descriptive title** for the issue to identify the problem.
9780* **Describe the exact steps which reproduce the problem** in as many details as possible. For example, start by explaining how you started the application, e.g. which command exactly you used in the terminal, or how you started Bittensor otherwise. When listing steps, **don't just say what you did, but explain how you did it**. For example, if you ran with a set of custom configs, explain if you used a config file or command line arguments. 
9781* **Provide specific examples to demonstrate the steps**. Include links to files or GitHub projects, or copy/pasteable snippets, which you use in those examples. If you're providing snippets in the issue, use [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines).
9782* **Describe the behavior you observed after following the steps** and point out what exactly is the problem with that behavior.
9783* **Explain which behavior you expected to see instead and why.**
9784* **Include screenshots and animated GIFs** which show you following the described steps and clearly demonstrate the problem. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux.
9785* **If you're reporting that Bittensor crashed**, include a crash report with a stack trace from the operating system. On macOS, the crash report will be available in `Console.app` under "Diagnostic and usage information" > "User diagnostic reports". Include the crash report in the issue in a [code block](https://help.github.com/articles/markdown-basics/#multiple-lines), a [file attachment](https://help.github.com/articles/file-attachments-on-issues-and-pull-requests/), or put it in a [gist](https://gist.github.com/) and provide link to that gist.
9786* **If the problem is related to performance or memory**, include a CPU profile capture with your report, if you're using a GPU then include a GPU profile capture as well. Look into the [PyTorch Profiler](https://pytorch.org/tutorials/recipes/recipes/profiler_recipe.html) to look at memory usage of your model.
9787* **If the problem wasn't triggered by a specific action**, describe what you were doing before the problem happened and share more information using the guidelines below.
9788
9789Provide more context by answering these questions:
9790
9791* **Did the problem start happening recently** (e.g. after updating to a new version) or was this always a problem?
9792* If the problem started happening recently, **can you reproduce the problem in an older version of Bittensor?** 
9793* **Can you reliably reproduce the issue?** If not, provide details about how often the problem happens and under which conditions it normally happens.
9794
9795Include details about your configuration and environment:
9796
9797* **Which version of Bittensor Subnet are you using?**
9798* **What commit hash are you on?** You can get the exact commit hash by checking `git log` and pasting the full commit hash.
9799* **What's the name and version of the OS you're using**?
9800* **Are you running Bittensor Subnet in a virtual machine?** If so, which VM software are you using and which operating systems and versions are used for the host and the guest?
9801* **Are you running Bittensor Subnet in a dockerized container?** If so, have you made sure that your docker container contains your latest changes and is up to date with Master branch?
9802
9803### Suggesting Enhancements and Features
9804
9805This section guides you through submitting an enhancement suggestion, including completely new features and minor improvements to existing functionality. Following these guidelines helps maintainers and the community understand your suggestion :pencil: and find related suggestions :mag_right:.
9806
9807When you are creating an enhancement suggestion, please [include as many details as possible](#how-do-i-submit-a-good-enhancement-suggestion). Fill in [the template](https://bit.ly/atom-behavior-pr), including the steps that you imagine you would take if the feature you're requesting existed.
9808
9809#### Before Submitting An Enhancement Suggestion
9810
9811* **Check the [debugging guide](./DEBUGGING.md).** for tips — you might discover that the enhancement is already available. Most importantly, check if you're using the latest version of the project first.
9812
9813#### How Submit A (Good) Feature Suggestion
9814
9815* **Use a clear and descriptive title** for the issue to identify the problem.
9816* **Provide a step-by-step description of the suggested enhancement** in as many details as possible.
9817* **Provide specific examples to demonstrate the steps**. Include copy/pasteable snippets which you use in those examples, as [Markdown code blocks](https://help.github.com/articles/markdown-basics/#multiple-lines).
9818* **Describe the current behavior** and **explain which behavior you expected to see instead** and why.
9819* **Include screenshots and animated GIFs** which help you demonstrate the steps or point out the part of the project which the suggestion is related to. You can use [this tool](https://www.cockos.com/licecap/) to record GIFs on macOS and Windows, and [this tool](https://github.com/colinkeenan/silentcast) or [this tool](https://github.com/GNOME/byzanz) on Linux.
9820* **Explain why this enhancement would be useful** to most users.
9821* **List some other text editors or applications where this enhancement exists.**
9822* **Specify the name and version of the OS you're using.**
9823
9824Thank you for considering contributing to Bittensor! Any help is greatly appreciated along this journey to incentivize open and permissionless intelligence.
9825
9826
9827
9828---
9829File: /contrib/DEVELOPMENT_WORKFLOW.md
9830---
9831
9832# Bittensor Subnet Development Workflow
9833
9834This is a highly advisable workflow to follow to keep your subtensor project organized and foster ease of contribution.
9835
9836## Table of contents
9837
9838- [Bittensor Subnet Development Workflow](#bittensor-subnet-development-workflow)
9839  - [Main Branches](#main-branches)
9840  - [Development Model](#development-model)
9841      - [Feature Branches](#feature-branches)
9842      - [Release Branches](#release-branches)
9843      - [Hotfix Branches](#hotfix-branches)
9844  - [Git Operations](#git-operations)
9845      - [Creating a Feature Branch](#creating-a-feature-branch)
9846      - [Merging Feature Branch into Staging](#merging-feature-branch-into-staging)
9847      - [Creating a Release Branch](#creating-a-release-branch)
9848      - [Finishing a Release Branch](#finishing-a-release-branch)
9849      - [Creating a Hotfix Branch](#creating-a-hotfix-branch)
9850      - [Finishing a Hotfix Branch](#finishing-a-hotfix-branch)
9851  - [Continuous Integration (CI) and Continuous Deployment (CD)](#continuous-integration-ci-and-continuous-deployment-cd)
9852  - [Versioning and Release Notes](#versioning-and-release-notes)
9853  - [Pending Tasks](#pending-tasks)
9854
9855## Main Branches
9856
9857Bittensor's codebase consists of two main branches: **main** and **staging**.
9858
9859**main**
9860- This is Bittensor's live production branch, which should only be updated by the core development team. This branch is protected, so refrain from pushing or merging into it unless authorized.
9861
9862**staging**
9863- This branch is continuously updated and is where you propose and merge changes. It's essentially Bittensor's active development branch.
9864
9865## Development Model
9866
9867### Feature Branches
9868
9869- Branch off from: `staging`
9870- Merge back into: `staging`
9871- Naming convention: `feature/<ticket>/<descriptive-sentence>`
9872
9873Feature branches are used to develop new features for upcoming or future releases. They exist as long as the feature is in development, but will eventually be merged into `staging` or discarded. Always delete your feature branch after merging to avoid unnecessary clutter.
9874
9875### Release Branches
9876
9877- Branch off from: `staging`
9878- Merge back into: `staging` and then `main`
9879- Naming convention: `release/<version>/<descriptive-message>/<creator's-name>`
9880
9881Release branches support the preparation of a new production release, allowing for minor bug fixes and preparation of metadata (version number, configuration, etc). All new features should be merged into `staging` and wait for the next big release.
9882
9883### Hotfix Branches
9884
9885General workflow:
9886
9887- Branch off from: `main` or `staging`
9888- Merge back into: `staging` then `main`
9889- Naming convention: `hotfix/<version>/<descriptive-message>/<creator's-name>` 
9890
9891Hotfix branches are meant for quick fixes in the production environment. When a critical bug in a production version must be resolved immediately, a hotfix branch is created.
9892
9893## Git Operations
9894
9895#### Create a feature branch
9896
98971. Branch from the **staging** branch.
9898    1. Command: `git checkout -b feature/my-feature staging`
9899
9900> Rebase frequently with the updated staging branch so you do not face big conflicts before submitting your pull request. Remember, syncing your changes with other developers could also help you avoid big conflicts.
9901
9902#### Merge feature branch into staging
9903
9904In other words, integrate your changes into a branch that will be tested and prepared for release.
9905
99061. Switch branch to staging: `git checkout staging`
99072. Merging feature branch into staging: `git merge --no-ff feature/my-feature`
99083. Pushing changes to staging: `git push origin staging`
99094. Delete feature branch: `git branch -d feature/my-feature` (alternatively, this can be navigated on the GitHub web UI)
9910
9911This operation is done by Github when merging a PR.
9912
9913So, what you have to keep in mind is:
9914- Open the PR against the `staging` branch.
9915- After merging a PR you should delete your feature branch. This will be strictly enforced.
9916
9917#### Creating a release branch
9918
99191. Create branch from staging: `git checkout -b release/3.4.0/descriptive-message/creator's_name staging`
99202. Updating version with major or minor: `./scripts/update_version.sh major|minor`
99213. Commit file changes with new version: `git commit -a -m "Updated version to 3.4.0"`
9922
9923
9924#### Finishing a Release Branch
9925
9926This involves releasing stable code and generating a new version for bittensor.
9927
99281. Switch branch to main: `git checkout main`
99292. Merge release branch into main: `git merge --no-ff release/3.4.0/optional-descriptive-message`
99303. Tag changeset: `git tag -a v3.4.0 -m "Releasing v3.4.0: some comment about it"`
99314. Push changes to main: `git push origin main`
99325. Push tags to origin: `git push origin --tags`
9933
9934To keep the changes made in the __release__ branch, we need to merge those back into `staging`:
9935
9936- Switch branch to staging: `git checkout staging`.
9937- Merging release branch into staging: `git merge --no-ff release/3.4.0/optional-descriptive-message`
9938
9939This step may well lead to a merge conflict (probably even, since we have changed the version number). If so, fix it and commit.
9940
9941
9942#### Creating a hotfix branch
99431. Create branch from main: `git checkout -b hotfix/3.3.4/descriptive-message/creator's-name main`
99442. Update patch version: `./scripts/update_version.sh patch`
99453. Commit file changes with new version: `git commit -a -m "Updated version to 3.3.4"`
99464. Fix the bug and commit the fix: `git commit -m "Fixed critical production issue X"`
9947
9948#### Finishing a Hotfix Branch
9949
9950Finishing a hotfix branch involves merging the bugfix into both `main` and `staging`.
9951
99521. Switch branch to main: `git checkout main`
99532. Merge hotfix into main: `git merge --no-ff hotfix/3.3.4/optional-descriptive-message`
99543. Tag new version: `git tag -a v3.3.4 -m "Releasing v3.3.4: descriptive comment about the hotfix"`
99554. Push changes to main: `git push origin main`
99565. Push tags to origin: `git push origin --tags`
99576. Switch branch to staging: `git checkout staging`
99587. Merge hotfix into staging: `git merge --no-ff hotfix/3.3.4/descriptive-message/creator's-name`
99598. Push changes to origin/staging: `git push origin staging`
99609. Delete hotfix branch: `git branch -d hotfix/3.3.4/optional-descriptive-message`
9961
9962The one exception to the rule here is that, **when a release branch currently exists, the hotfix changes need to be merged into that release branch, instead of** `staging`. Back-merging the bugfix into the __release__ branch will eventually result in the bugfix being merged into `develop` too, when the release branch is finished. (If work in develop immediately requires this bugfix and cannot wait for the release branch to be finished, you may safely merge the bugfix into develop now already as well.)
9963
9964Finally, we remove the temporary branch:
9965
9966- `git branch -d hotfix/3.3.4/optional-descriptive-message`
9967## Continuous Integration (CI) and Continuous Deployment (CD)
9968
9969Continuous Integration (CI) is a software development practice where members of a team integrate their work frequently. Each integration is verified by an automated build and test process to detect integration errors as quickly as possible. 
9970
9971Continuous Deployment (CD) is a software engineering approach in which software functionalities are delivered frequently through automated deployments.
9972
9973- **CircleCI job**: Create jobs in CircleCI to automate the merging of staging into main and release version (needed to release code) and building and testing Bittensor (needed to merge PRs).
9974
9975> It is highly recommended to set up your own circleci pipeline with your subnet
9976
9977## Versioning and Release Notes
9978
9979Semantic versioning helps keep track of the different versions of the software. When code is merged into main, generate a new version. 
9980
9981Release notes provide documentation for each version released to the users, highlighting the new features, improvements, and bug fixes. When merged into main, generate GitHub release and release notes.
9982
9983## Pending Tasks
9984
9985Follow these steps when you are contributing to the bittensor subnet:
9986
9987- Determine if main and staging are different
9988- Determine what is in staging that is not merged yet
9989    - Document not released developments
9990    - When merged into staging, generate information about what's merged into staging but not released.
9991    - When merged into main, generate GitHub release and release notes.
9992- CircleCI jobs 
9993    - Merge staging into main and release version (needed to release code)
9994    - Build and Test Bittensor (needed to merge PRs)
9995
9996This document can be improved as the Bittensor project continues to develop and change.
9997
9998
9999
10000---
10001File: /contrib/STYLE.md
10002---
10003
10004# Style Guide
10005
10006A project’s long-term success rests (among other things) on its maintainability, and a maintainer has few tools more powerful than his or her project’s log. It’s worth taking the time to learn how to care for one properly. What may be a hassle at first soon becomes habit, and eventually a source of pride and productivity for all involved.
10007
10008Most programming languages have well-established conventions as to what constitutes idiomatic style, i.e. naming, formatting and so on. There are variations on these conventions, of course, but most developers agree that picking one and sticking to it is far better than the chaos that ensues when everybody does their own thing.
10009
10010# Table of Contents
100111. [Code Style](#code-style)
100122. [Naming Conventions](#naming-conventions)
100133. [Git Commit Style](#git-commit-style)
100144. [The Six Rules of a Great Commit](#the-six-rules-of-a-great-commit)
10015   - [1. Atomic Commits](#1-atomic-commits)
10016   - [2. Separate Subject from Body with a Blank Line](#2-separate-subject-from-body-with-a-blank-line)
10017   - [3. Limit the Subject Line to 50 Characters](#3-limit-the-subject-line-to-50-characters)
10018   - [4. Use the Imperative Mood in the Subject Line](#4-use-the-imperative-mood-in-the-subject-line)
10019   - [5. Wrap the Body at 72 Characters](#5-wrap-the-body-at-72-characters)
10020   - [6. Use the Body to Explain What and Why vs. How](#6-use-the-body-to-explain-what-and-why-vs-how)
100215. [Tools Worth Mentioning](#tools-worth-mentioning)
10022   - [Using `--fixup`](#using---fixup)
10023   - [Interactive Rebase](#interactive-rebase)
100246. [Pull Request and Squashing Commits Caveats](#pull-request-and-squashing-commits-caveats)
10025
10026
10027### Code style
10028
10029#### General Style
10030Python's official style guide is PEP 8, which provides conventions for writing code for the main Python distribution. Here are some key points:
10031
10032- `Indentation:` Use 4 spaces per indentation level.
10033
10034- `Line Length:` Limit all lines to a maximum of 79 characters.
10035
10036- `Blank Lines:` Surround top-level function and class definitions with two blank lines. Method definitions inside a class are surrounded by a single blank line.
10037
10038- `Imports:` Imports should usually be on separate lines and should be grouped in the following order:
10039
10040    - Standard library imports.
10041    - Related third party imports.
10042    - Local application/library specific imports.
10043- `Whitespace:` Avoid extraneous whitespace in the following situations:
10044
10045    - Immediately inside parentheses, brackets or braces.
10046    - Immediately before a comma, semicolon, or colon.
10047    - Immediately before the open parenthesis that starts the argument list of a function call.
10048- `Comments:` Comments should be complete sentences and should be used to clarify code and are not a substitute for poorly written code.
10049
10050#### For Python
10051
10052- `List Comprehensions:` Use list comprehensions for concise and readable creation of lists.
10053
10054- `Generators:` Use generators when dealing with large amounts of data to save memory.
10055
10056- `Context Managers:` Use context managers (with statement) for resource management.
10057
10058- `String Formatting:` Use f-strings for formatting strings in Python 3.6 and above.
10059
10060- `Error Handling:` Use exceptions for error handling whenever possible.
10061
10062#### More details
10063
10064Use `black` to format your python code before commiting for consistency across such a large pool of contributors. Black's code [style](https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#code-style) ensures consistent and opinionated code formatting. It automatically formats your Python code according to the Black style guide, enhancing code readability and maintainability.
10065
10066Key Features of Black:
10067
10068    Consistency: Black enforces a single, consistent coding style across your project, eliminating style debates and allowing developers to focus on code logic.
10069
10070    Readability: By applying a standard formatting style, Black improves code readability, making it easier to understand and collaborate on projects.
10071
10072    Automation: Black automates the code formatting process, saving time and effort. It eliminates the need for manual formatting and reduces the likelihood of inconsistencies.
10073
10074### Naming Conventions
10075
10076- `Classes:` Class names should normally use the CapWords Convention.
10077- `Functions and Variables:` Function names should be lowercase, with words separated by underscores as necessary to improve readability. Variable names follow the same convention as function names.
10078
10079- `Constants:` Constants are usually defined on a module level and written in all capital letters with underscores separating words.
10080
10081- `Non-public Methods and Instance Variables:` Use a single leading underscore (_). This is a weak "internal use" indicator.
10082
10083- `Strongly "private" methods and variables:` Use a double leading underscore (__). This triggers name mangling in Python.
10084
10085
10086### Git commit style
10087
10088Here’s a model Git commit message when contributing:
10089```
10090Summarize changes in around 50 characters or less
10091
10092More detailed explanatory text, if necessary. Wrap it to about 72
10093characters or so. In some contexts, the first line is treated as the
10094subject of the commit and the rest of the text as the body. The
10095blank line separating the summary from the body is critical (unless
10096you omit the body entirely); various tools like `log`, `shortlog`
10097and `rebase` can get confused if you run the two together.
10098
10099Explain the problem that this commit is solving. Focus on why you
10100are making this change as opposed to how (the code explains that).
10101Are there side effects or other unintuitive consequences of this
10102change? Here's the place to explain them.
10103
10104Further paragraphs come after blank lines.
10105
10106 - Bullet points are okay, too
10107
10108 - Typically a hyphen or asterisk is used for the bullet, preceded
10109   by a single space, with blank lines in between, but conventions
10110   vary here
10111
10112If you use an issue tracker, put references to them at the bottom,
10113like this:
10114
10115Resolves: #123
10116See also: #456, #789
10117```
10118
10119
10120## The six rules of a great commit.
10121
10122#### 1. Atomic Commits
10123An “atomic” change revolves around one task or one fix.
10124
10125Atomic Approach
10126 - Commit each fix or task as a separate change
10127 - Only commit when a block of work is complete
10128 - Commit each layout change separately
10129 - Joint commit for layout file, code behind file, and additional resources
10130
10131Benefits
10132
10133- Easy to roll back without affecting other changes
10134- Easy to make other changes on the fly
10135- Easy to merge features to other branches
10136
10137#### Avoid trivial commit messages
10138
10139Commit messages like "fix", "fix2", or "fix3" don't provide any context or clear understanding of what changes the commit introduces. Here are some examples of good vs. bad commit messages:
10140
10141**Bad Commit Message:** 
10142
10143    $ git commit -m "fix"
10144
10145**Good Commit Message:**
10146
10147    $ git commit -m "Fix typo in README file"
10148
10149> **Caveat**: When working with new features, an atomic commit will often consist of multiple files, since a layout file, code behind file, and additional resources may have been added/modified. You don’t want to commit all of these separately, because if you had to roll back the application to a state before the feature was added, it would involve multiple commit entries, and that can get confusing
10150
10151#### 2. Separate subject from body with a blank line
10152
10153Not every commit requires both a subject and a body. Sometimes a single line is fine, especially when the change is so simple that no further context is necessary. 
10154
10155For example:
10156
10157    Fix typo in introduction to user guide
10158
10159Nothing more need be said; if the reader wonders what the typo was, she can simply take a look at the change itself, i.e. use     git show or git diff or git log -p.
10160
10161If you’re committing something like this at the command line, it’s easy to use the -m option to git commit:
10162
10163    $ git commit -m"Fix typo in introduction to user guide"
10164
10165However, when a commit merits a bit of explanation and context, you need to write a body. For example:
10166
10167    Derezz the master control program
10168
10169    MCP turned out to be evil and had become intent on world domination.
10170    This commit throws Tron's disc into MCP (causing its deresolution)
10171    and turns it back into a chess game.
10172
10173Commit messages with bodies are not so easy to write with the -m option. You’re better off writing the message in a proper text editor. [See Pro Git](https://git-scm.com/book/en/v2/Customizing-Git-Git-Configuration).
10174
10175In any case, the separation of subject from body pays off when browsing the log. Here’s the full log entry:
10176
10177    $ git log
10178    commit 42e769bdf4894310333942ffc5a15151222a87be
10179    Author: Kevin Flynn <[email protected]>
10180    Date:   Fri Jan 01 00:00:00 1982 -0200
10181    
10182     Derezz the master control program
10183    
10184     MCP turned out to be evil and had become intent on world domination.
10185     This commit throws Tron's disc into MCP (causing its deresolution)
10186     and turns it back into a chess game.
10187
10188
10189#### 3. Limit the subject line to 50 characters
1019050 characters is not a hard limit, just a rule of thumb. Keeping subject lines at this length ensures that they are readable, and forces the author to think for a moment about the most concise way to explain what’s going on.
10191
10192GitHub’s UI is fully aware of these conventions. It will warn you if you go past the 50 character limit. Git will truncate any subject line longer than 72 characters with an ellipsis, thus keeping it to 50 is best practice.
10193
10194#### 4. Use the imperative mood in the subject line
10195Imperative mood just means “spoken or written as if giving a command or instruction”. A few examples:
10196
10197    Clean your room
10198    Close the door
10199    Take out the trash
10200
10201Each of the seven rules you’re reading about right now are written in the imperative (“Wrap the body at 72 characters”, etc.).
10202
10203The imperative can sound a little rude; that’s why we don’t often use it. But it’s perfect for Git commit subject lines. One reason for this is that Git itself uses the imperative whenever it creates a commit on your behalf.
10204
10205For example, the default message created when using git merge reads:
10206
10207    Merge branch 'myfeature'
10208
10209And when using git revert:
10210
10211    Revert "Add the thing with the stuff"
10212
10213    This reverts commit cc87791524aedd593cff5a74532befe7ab69ce9d.
10214
10215Or when clicking the “Merge” button on a GitHub pull request:
10216
10217    Merge pull request #123 from someuser/somebranch
10218
10219So when you write your commit messages in the imperative, you’re following Git’s own built-in conventions. For example:
10220
10221    Refactor subsystem X for readability
10222    Update getting started documentation
10223    Remove deprecated methods
10224    Release version 1.0.0
10225
10226Writing this way can be a little awkward at first. We’re more used to speaking in the indicative mood, which is all about reporting facts. That’s why commit messages often end up reading like this:
10227
10228    Fixed bug with Y
10229    Changing behavior of X
10230
10231And sometimes commit messages get written as a description of their contents:
10232
10233    More fixes for broken stuff
10234    Sweet new API methods
10235
10236To remove any confusion, here’s a simple rule to get it right every time.
10237
10238**A properly formed Git commit subject line should always be able to complete the following sentence:**
10239
10240    If applied, this commit will <your subject line here>
10241
10242For example:
10243
10244    If applied, this commit will refactor subsystem X for readability
10245    If applied, this commit will update getting started documentation
10246    If applied, this commit will remove deprecated methods
10247    If applied, this commit will release version 1.0.0
10248    If applied, this commit will merge pull request #123 from user/branch
10249
10250#### 5. Wrap the body at 72 characters
10251Git never wraps text automatically. When you write the body of a commit message, you must mind its right margin, and wrap text manually.
10252
10253The recommendation is to do this at 72 characters, so that Git has plenty of room to indent text while still keeping everything under 80 characters overall.
10254
10255A good text editor can help here. It’s easy to configure Vim, for example, to wrap text at 72 characters when you’re writing a Git commit.
10256
10257#### 6. Use the body to explain what and why vs. how
10258This [commit](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) from Bitcoin Core is a great example of explaining what changed and why:
10259
10260```
10261commit eb0b56b19017ab5c16c745e6da39c53126924ed6
10262Author: Pieter Wuille <[email protected]>
10263Date:   Fri Aug 1 22:57:55 2014 +0200
10264
10265   Simplify serialize.h's exception handling
10266
10267   Remove the 'state' and 'exceptmask' from serialize.h's stream
10268   implementations, as well as related methods.
10269
10270   As exceptmask always included 'failbit', and setstate was always
10271   called with bits = failbit, all it did was immediately raise an
10272   exception. Get rid of those variables, and replace the setstate
10273   with direct exception throwing (which also removes some dead
10274   code).
10275
10276   As a result, good() is never reached after a failure (there are
10277   only 2 calls, one of which is in tests), and can just be replaced
10278   by !eof().
10279
10280   fail(), clear(n) and exceptions() are just never called. Delete
10281   them.
10282```
10283
10284Take a look at the [full diff](https://github.com/bitcoin/bitcoin/commit/eb0b56b19017ab5c16c745e6da39c53126924ed6) and just think how much time the author is saving fellow and future committers by taking the time to provide this context here and now. If he didn’t, it would probably be lost forever.
10285
10286In most cases, you can leave out details about how a change has been made. Code is generally self-explanatory in this regard (and if the code is so complex that it needs to be explained in prose, that’s what source comments are for). Just focus on making clear the reasons why you made the change in the first place—the way things worked before the change (and what was wrong with that), the way they work now, and why you decided to solve it the way you did.
10287
10288The future maintainer that thanks you may be yourself!
10289
10290
10291
10292#### Tools worth mentioning
10293
10294##### Using `--fixup`
10295
10296If you've made a commit and then realize you've missed something or made a minor mistake, you can use the `--fixup` option. 
10297
10298For example, suppose you've made a commit with a hash `9fceb02`. Later, you realize you've left a debug statement in your code. Instead of making a new commit titled "remove debug statement" or "fix", you can do the following:
10299
10300    $ git commit --fixup 9fceb02
10301
10302This will create a new commit to fix the issue, with a message like "fixup! The original commit message".
10303
10304##### Interactive Rebase
10305
10306Interactive rebase, or `rebase -i`, can be used to squash these fixup commits into the original commits they're fixing, which cleans up your commit history. You can use the `autosquash` option to automatically squash any commits marked as "fixup" into their target commits.
10307
10308For example:
10309
10310    $ git rebase -i --autosquash HEAD~5
10311
10312This command starts an interactive rebase for the last 5 commits (`HEAD~5`). Any commits marked as "fixup" will be automatically moved to squash with their target commits.
10313
10314The benefit of using `--fixup` and interactive rebase is that it keeps your commit history clean and readable. It groups fixes with the commits they are related to, rather than having a separate "fix" commit that might not make sense to other developers (or even to you) in the future.
10315
10316
10317---
10318
10319#### Pull Request and Squashing Commits Caveats
10320
10321While atomic commits are great for development and for understanding the changes within the branch, the commit history can get messy when merging to the main branch. To keep a cleaner and more understandable commit history in our main branch, we encourage squashing all the commits of a PR into one when merging.
10322
10323This single commit should provide an overview of the changes that the PR introduced. It should follow the guidelines for atomic commits (an atomic commit is complete, self-contained, and understandable) but on the scale of the entire feature, task, or fix that the PR addresses. This approach combines the benefits of atomic commits during development with a clean commit history in our main branch.
10324
10325Here is how you can squash commits:
10326
10327```bash
10328git rebase -i HEAD~n
10329```
10330
10331where `n` is the number of commits to squash. After running the command, replace `pick` with `squash` for the commits you want to squash into the previous commit. This will combine the commits and allow you to write a new commit message.
10332
10333In this context, an atomic commit message could look like:
10334
10335```
10336Add feature X
10337
10338This commit introduces feature X which does A, B, and C. It adds 
10339new files for layout, updates the code behind the file, and introduces
10340new resources. This change is important because it allows users to 
10341perform task Y more efficiently. 
10342
10343It includes:
10344- Creation of new layout file
10345- Updates in the code-behind file
10346- Addition of new resources
10347
10348Resolves: #123
10349```
10350
10351In your PRs, remember to detail what the PR is introducing or fixing. This will be helpful for reviewers to understand the context and the reason behind the changes. 
10352
10353
10354
10355---
10356File: /docs/miners/finetuning.md
10357---
10358
10359# SWE Finetuning
10360
10361## Task Outline
10362
10363The task is to create a patch that fixes an issue in the repository. You will be provided the location to a repository and a description of the issue. This will be a real git repository as well as a real issue and you will be graded against the real patch. 
10364
10365### What is a patch?
10366
10367A patch is a list of edits to the repository. Each edit is an edit of a file, containing the file name, line number, line content, and new line content. As defined in the `Patch` class below.
10368
10369```python
10370class Edit(BaseModel):
10371    file_name: str
10372    line_number: int
10373    line_content: str
10374    new_line_content: str
10375
10376class Patch(BaseModel):
10377    edits: list[Edit]
10378```
10379
10380## Things available to you
10381
10382### Packages
10383
10384You will have access to the modules in the `coding/constants.py` file in the `ALLOWED_MODULES` list. Along with specific imports from certain packages defined in the `coding/constants.py` file, in the `ALLOWED_IMPORTS` dictionary.
10385
10386### Size Limits
10387
10388You will have access to the `NUM_ALLOWED_CHARACTERS` variable in the `coding/constants.py` file. This is the maximum number of characters that can be used in your submission.
10389
10390### LLM Models
10391
10392You will have access to the following LLM models:
10393
10394- "gpt-4o"
10395- "gpt-3.5-turbo"
10396- "gpt-4o-mini"
10397- "claude-3-5-sonnet"
10398- "gemini-2.0-flash-exp"
10399
10400You will also have access to the following embedding models:
10401
10402- "text-embedding-3-small"
10403
10404#### How to use the models
10405
10406You can use the models by calling the `llm` property of the `SWEBase` class. For example:
10407
10408```python
10409from coding.finetune.swe-server.swebase import SWEBase
10410
10411swe = SWEBase()
10412response, tokens = swe.llm("gpt-4o", "What is the capital of France?")
10413embeddings = swe.llm.embed("What is the capital of France?")
10414```
10415
10416#### Reminders
10417
10418- The server that hosts your code is restricted to not allow for internet access. You should not try to use it as you will likely fail.
10419
10420## Submission
10421
10422Locate the `coding/miners/swe.py` file. This is where your miner will go to grab your submission.
10423
10424Your submission must initiate a class `SWE` that inherits from `SWEBase`. This will be called with a `repo_location` and `issue_description`. 
10425
10426The `SWE` class must return a `Patch` object. This will be used to evaluate your submission.
10427
10428## Testing
10429
10430Use the notebook `notebooks/sample-swe-task.ipynb` to test your submission.
10431
10432You need to verify your logic using the notebook `notebooks/logic-verification.ipynb`. 
10433
10434
10435
10436---
10437File: /docs/miners/quickstart.md
10438---
10439
10440# Quickstart to Mining
10441
10442## Installation
10443
10444
10445This repository requires python3.9 or higher. To install it, simply clone this repository and run the [install.sh](./install.sh) script.
10446```bash
10447git clone https://github.com/brokespace/code
10448cd code
10449python -m pip install --use-deprecated=legacy-resolver -r requirements.txt
10450python -m pip install --use-deprecated=legacy-resolver -e .
10451python -m pip uninstall uvloop # b/c it causes issues with threading/loops
10452```
10453
10454
10455## How to Run
10456You can use the following command to run a miner or a validator. 
10457
10458```bash
10459python <SCRIPT_PATH>
10460    --netuid 45
10461    --subtensor.network <finney/local/test>
10462    --neuron.device cuda
10463    --wallet.name <your wallet> # Must be created using the bittensor-cli
10464    --wallet.hotkey <your hotkey> # Must be created using the bittensor-cli
10465    --logging.debug # Run in debug mode, alternatively --logging.trace for trace mode
10466    --axon.port # VERY IMPORTANT: set the port to be one of the open TCP ports on your machine
10467```
10468
10469where `SCRIPT_PATH` is either: 
104701. neurons/miner.py
104712. neurons/validator.py
10472
10473For ease of use, you can run the scripts as well with PM2. Installation of PM2 is: 
10474**On Linux**:
10475```bash
10476sudo apt update && sudo apt install jq && sudo apt install npm && sudo npm install pm2 -g && pm2 update
10477``` 
10478
10479Example of running an openai miner:
10480
10481```bash
10482pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid XY  --subtensor.network finney --wallet.name coldkey --wallet.hotkey hotkey --neuron.model_id gpt4 --axon.port 8091 --logging.debug --miner.name openai
10483```
10484
10485## Subnet Wallet Registration
10486Register your wallet on the subnet: 
10487```
10488btcli s register --subtensor.network finney --netuid 45
10489```
10490
10491Testnet: 
10492```
10493btcli s register --subtensor.network test --netuid 171
10494```
10495
10496
10497# Testnet 
10498We highly recommend that you run your miners on testnet before deploying on main. This is give you an opportunity to debug your systems, and ensure that you will not lose valuable immunity time. The SN1 testnet is **netuid 171**. 
10499
10500In order to run on testnet, you will need to go through the same hotkey registration proceure as on main, but using **testtao**. You will need to ask for some in the community discord if you do not have any. 
10501
10502To run:
10503
10504```bash
10505pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid 171  --subtensor.network test --wallet.name test_coldkey --wallet.hotkey test_hotkey --neuron.model_id gpt4 --axon.port 8091 --logging.debug --miner.name openai
10506```
10507
10508
10509# Ramping up
10510
10511
10512## Tasks
10513
10514A list of the provided tasks can be seen [here](./tasks.md). Tasks are scored equally based on speed and similarity to the answer.
10515
10516## Sample Miners
10517
10518There are some sample miners you can use check them out [here](./sample-miners.md). 
10519
10520
10521## Helpful Tips
10522
10523It is suggested that you play around with mining on Testnet before going to Mainnet.
10524If issues are encountered with btcli, it is recommended to use btcli v7.1.2 (https://github.com/opentensor/bittensor/commits/release/7.1.2/)
10525
10526
10527
10528
10529---
10530File: /docs/miners/sample-miners.md
10531---
10532
10533# Sample Miners
10534
10535
10536## Qwen Mistral Miner
10537
10538To get started on this miner you are going to want two models, `CodeQwen` and `Mistral`. This setup requires quite a bit of VRAM, I would suggest at a minimum 24gb of VRAM.
10539
10540
10541
10542### Starting LLM's
10543
10544Either use Python or Docker to start the LLMs. If using Docker you will need to get the [cuda container toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/1.13.5/install-guide.html).
10545
10546#### Using Python
10547Create a venv for VLLM, this venv must not be the same one you use to run the miner
10548
10549```bash
10550python -m venv vllm
10551source vllm/bin/activate
10552pip install vllm
10553```
10554
10555Then start the LLM's
10556
10557```bash
10558pm2 start --name "mistral" "vllm serve thesven/Mistral-7B-Instruct-v0.3-GPTQ  --max-model-len 4096 --quantization gptq --dtype half --gpu-memory-utilization 0.40 --port 8001"
10559```
10560
10561```bash
10562pm2 start --name "qwen" "vllm serve Qwen/CodeQwen1.5-7B-AWQ  --max-model-len 4096 --quantization awq --dtype half --gpu-memory-utilization 0.40 --port 8000"
10563```
10564
10565#### Using Docker
10566
10567The commands below will run VLLM on device=0 (gpu0), be sure to modify that if you want to run on a different gpu
10568
10569```bash
10570sudo docker run -d -p 8000:8000 --gpus device=0 --ipc host --name codeqwen docker.io/vllm/vllm-openai:latest --model Qwen/CodeQwen1.5-7B-AWQ --max-model-len 8096 --dtype half  --gpu-memory-utilization 0.4
10571```
10572
10573```bash
10574sudo docker run -d -p 8001:8001  --gpus device=0 --ipc host --name mistral-instruct docker.io/vllm/vllm-openai:latest --model thesven/Mistral-7B-Instruct-v0.3-GPTQ --max-model-len 8912  --dtype half --gpu-memory-utilization 0.40
10575```
10576
10577
10578### Starting the Miner
10579
10580Exit the previous venv for vllm, either creating a new venv or using your default python interpreter.
10581
10582```
10583pm2 start neurons/miner.py --interpreter python3 --name miner -- --netuid 45 --subtensor.network finney --wallet.name coldkey --wallet.hotkey hotkey --neuron.model_id Qwen/CodeQwen1.5-7B-AWQ --axon.port 8091 --logging.debug --miner.name qwen_mistral
10584```
10585
10586
10587---
10588File: /docs/miners/tasks.md
10589---
10590
10591# Coding Tasks
10592
10593### 1. Completion
10594
10595The goal of this task is to complete the given code. You will be provided a functions name followed by "<|fim_hole|>" and you must complete the function body.
10596
10597The only protocol being sent is `query`, the expected response is the completed function body. 
10598
10599
10600### 2. Debugging
10601
10602This task is under development.
10603
10604### 3. Fill-In-The-Middle (FIM)
10605
10606The goal of this task is to fill in the middle of the given code. You will be provided a portion of code with a chunk missing. The chunk to be filled in is marked with "<|fim_hole|>". You should return the code to be placed in the filled in spot.
10607
10608The only protocol being sent is `query`, the expected response is the code to be placed in the "<|fim_hole|>".
10609
10610
10611### 4. Organic Convo
10612
10613This task is dynamic and will be at random sent using the input from the user using the frontend. You will be sent a conversation from the chat-frontend and are expected to return a good response.
10614
10615You will be provided `messages` and potentially some `files`. You must return with an appropriate response given the messages and files.
10616
10617
10618### 5. Repo
10619
10620In this task you will be sent a `query` containing a majority of the code from a file in a given repo, alongside that you will be given `files` containing the other files in the repo. Your goal is to use the files to complete the missing code in the query file.
10621
10622
10623### 6. Repo File 
10624
10625In this task you will be given a `query` containing a summary of what a python file did, and `files` containing some other files that came from the same repo. You are to write the entire python file given the summary and files. 
10626
10627### 7. SWE Task
10628
10629In this task you are given `files` and a `query` of the style:
10630
10631```
10632Given the following issue and files, please return a patch file that would fix the issue. An example of what you should return is
10633<patch> diff --git a/example.txt b/example.txt
10634index e69de29..d95f3ad 100644
10635--- a/example.txt
10636+++ b/example.txt
10637@@ -1,3 +1,3 @@
10638-Hello, world!
10639+Hello, universe!
10640 
10641 This is a simple text file.
10642-The end.
10643+Goodbye, world! </patch>
10644The following issue is:\n\n
10645
10646<INSERT ISSUE HERE>
10647```
10648
10649You must return a jsonified dictionary where the key is the filename and the value is the patch for that file. It does not have to be perfect as it will be parsed out and specific line numbers will be compared. 
10650
10651The above prompt when fed into an LLM alonsigde the files should be parsable and returnable immediately with the following code:
10652
10653```python
10654def parse_diff(diff_string):
10655    lines = diff_string.splitlines()
10656    file_diffs = {}
10657    current_file = None
10658    diff_content = []
10659    is_diff_block = False
10660
10661    for line in lines:
10662        if "diff --git" in line:
10663            if current_file and diff_content:
10664                file_diffs[current_file] = "\n".join(diff_content)
10665            current_file = line.split()[-1]
10666            diff_content = []
10667            is_diff_block = False
10668        elif line.startswith("---") or line.startswith("+++"):
10669            # Ignore these lines, as they indicate the old/new file path
10670            continue
10671        elif line.startswith("@@"):
10672            is_diff_block = True
10673            continue
10674        elif is_diff_block:
10675            diff_content.append(line)
10676
10677    if current_file and diff_content:
10678        file_diffs[current_file] = "\n".join(diff_content)
10679
10680    return file_diffs
10681```
10682
10683
10684---
10685File: /docs/stream_tutorial/client.py
10686---
10687
10688import argparse
10689import asyncio
10690import bittensor as bt
10691
10692from protocol import StreamPrompting
10693
10694"""
10695This has assumed you have:
106961. Registered your miner on the chain (finney/test)
106972. Are serving your miner on an open port (e.g. 12345)
10698
10699Steps:
10700- Instantiate your synapse subclass with the relevant information. E.g. messages, roles, etc.
10701- Instantiate your wallet and a dendrite client
10702- Query the dendrite client with your synapse object
10703- Iterate over the async generator to extract the yielded tokens on the server side
10704"""
10705
10706
10707async def query_synapse(my_uid, wallet_name, hotkey, network, netuid):
10708    syn = StreamPrompting(
10709        roles=["user"],
10710        messages=[
10711            "hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua."
10712        ],
10713    )
10714
10715    # create a wallet instance with provided wallet name and hotkey
10716    wallet = bt.wallet(name=wallet_name, hotkey=hotkey)
10717
10718    # instantiate the metagraph with provided network and netuid
10719    metagraph = bt.metagraph(
10720        netuid=netuid, network=network, sync=True, lite=False
10721    )
10722
10723    # Grab the axon you're serving
10724    axon = metagraph.axons[my_uid]
10725
10726    # Create a Dendrite instance to handle client-side communication.
10727    dendrite = bt.dendrite(wallet=wallet)
10728
10729    async def main():
10730        responses = await dendrite(
10731            [axon], syn, deserialize=False, streaming=True
10732        )
10733
10734        for resp in responses:
10735            i = 0
10736            async for chunk in resp:
10737                i += 1
10738                if i % 5 == 0:
10739                    print()
10740                if isinstance(chunk, list):
10741                    print(chunk[0], end="", flush=True)
10742                else:
10743                    # last object yielded is the synapse itself with completion filled
10744                    synapse = chunk
10745            break
10746
10747    # Run the main function with asyncio
10748    await main()
10749
10750
10751if __name__ == "__main__":
10752    parser = argparse.ArgumentParser(
10753        description="Query a Bittensor synapse with given parameters."
10754    )
10755
10756    # Adding arguments
10757    parser.add_argument(
10758        "--my_uid",
10759        type=int,
10760        required=True,
10761        help="Your unique miner ID on the chain",
10762    )
10763    parser.add_argument(
10764        "--netuid", type=int, required=True, help="Network Unique ID"
10765    )
10766    parser.add_argument(
10767        "--wallet_name", type=str, default="default", help="Name of the wallet"
10768    )
10769    parser.add_argument(
10770        "--hotkey", type=str, default="default", help="Hotkey for the wallet"
10771    )
10772    parser.add_argument(
10773        "--network",
10774        type=str,
10775        default="test",
10776        help='Network type, e.g., "test" or "mainnet"',
10777    )
10778
10779    # Parse arguments
10780    args = parser.parse_args()
10781
10782    # Running the async function with provided arguments
10783    asyncio.run(
10784        query_synapse(
10785            args.my_uid,
10786            args.wallet_name,
10787            args.hotkey,
10788            args.network,
10789            args.netuid,
10790        )
10791    )
10792
10793
10794
10795---
10796File: /docs/stream_tutorial/config.py
10797---
10798
10799import bittensor as bt
10800import argparse
10801import os
10802
10803
10804def check_config(cls, config: "bt.Config"):
10805    bt.axon.check_config(config)
10806    bt.logging.check_config(config)
10807    full_path = os.path.expanduser(
10808        "{}/{}/{}/{}".format(
10809            config.logging.logging_dir,
10810            config.wallet.get("name", bt.defaults.wallet.name),
10811            config.wallet.get("hotkey", bt.defaults.wallet.hotkey),
10812            config.miner.name,
10813        )
10814    )
10815    config.miner.full_path = os.path.expanduser(full_path)
10816    if not os.path.exists(config.miner.full_path):
10817        os.makedirs(config.miner.full_path)
10818
10819
10820def get_config() -> "bt.Config":
10821    parser = argparse.ArgumentParser()
10822    parser.add_argument(
10823        "--axon.port", type=int, default=8098, help="Port to run the axon on."
10824    )
10825    # Subtensor network to connect to
10826    parser.add_argument(
10827        "--subtensor.network",
10828        default="finney",
10829        help="Bittensor network to connect to.",
10830    )
10831    # Chain endpoint to connect to
10832    parser.add_argument(
10833        "--subtensor.chain_endpoint",
10834        default="wss://entrypoint-finney.opentensor.ai:443",
10835        help="Chain endpoint to connect to.",
10836    )
10837    # Adds override arguments for network and netuid.
10838    parser.add_argument(
10839        "--netuid", type=int, default=1, help="The chain subnet uid."
10840    )
10841
10842    parser.add_argument(
10843        "--miner.root",
10844        type=str,
10845        help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ",
10846        default="~/.bittensor/miners/",
10847    )
10848    parser.add_argument(
10849        "--miner.name",
10850        type=str,
10851        help="Trials for this miner go in miner.root / (wallet_cold - wallet_hot) / miner.name ",
10852        default="Bittensor Miner",
10853    )
10854
10855    # Run config.
10856    parser.add_argument(
10857        "--miner.blocks_per_epoch",
10858        type=str,
10859        help="Blocks until the miner repulls the metagraph from the chain",
10860        default=100,
10861    )
10862
10863    # Switches.
10864    parser.add_argument(
10865        "--miner.no_serve",
10866        action="store_true",
10867        help="If True, the miner doesnt serve the axon.",
10868        default=False,
10869    )
10870    parser.add_argument(
10871        "--miner.no_start_axon",
10872        action="store_true",
10873        help="If True, the miner doesnt start the axon.",
10874        default=False,
10875    )
10876
10877    # Mocks.
10878    parser.add_argument(
10879        "--miner.mock_subtensor",
10880        action="store_true",
10881        help="If True, the miner will allow non-registered hotkeys to mine.",
10882        default=False,
10883    )
10884
10885    # Adds subtensor specific arguments i.e. --subtensor.chain_endpoint ... --subtensor.network ...
10886    bt.subtensor.add_args(parser)
10887
10888    # Adds logging specific arguments i.e. --logging.debug ..., --logging.trace .. or --logging.logging_dir ...
10889    bt.logging.add_args(parser)
10890
10891    # Adds wallet specific arguments i.e. --wallet.name ..., --wallet.hotkey ./. or --wallet.path ...
10892    bt.wallet.add_args(parser)
10893
10894    # Adds axon specific arguments i.e. --axon.port ...
10895    bt.axon.add_args(parser)
10896
10897    # Activating the parser to read any command-line inputs.
10898    # To print help message, run python3 template/miner.py --help
10899    config = bt.config(parser)
10900
10901    # Logging captures events for diagnosis or understanding miner's behavior.
10902    config.full_path = os.path.expanduser(
10903        "{}/{}/{}/netuid{}/{}".format(
10904            config.logging.logging_dir,
10905            config.wallet.name,
10906            config.wallet.hotkey,
10907            config.netuid,
10908            "miner",
10909        )
10910    )
10911    # Ensure the directory for logging exists, else create one.
10912    if not os.path.exists(config.full_path):
10913        os.makedirs(config.full_path, exist_ok=True)
10914    return config
10915
10916
10917
10918---
10919File: /docs/stream_tutorial/miner.py
10920---
10921
10922import copy
10923import time
10924import asyncio
10925import argparse
10926import threading
10927import traceback
10928from abc import ABC, abstractmethod
10929from functools import partial
10930from starlette.types import Send
10931
10932import bittensor as bt
10933from transformers import GPT2Tokenizer
10934from typing import List, Dict, Tuple, Union, Callable, Awaitable
10935
10936from protocol import StreamPrompting
10937from config import get_config, check_config
10938
10939
10940class StreamMiner(ABC):
10941    def __init__(self, config=None, axon=None, wallet=None, subtensor=None):
10942        # Setup base config from Miner.config() and merge with subclassed config.
10943        base_config = copy.deepcopy(config or get_config())
10944        self.config = self.config()
10945        self.config.merge(base_config)
10946
10947        check_config(StreamMiner, self.config)
10948        bt.logging.info(self.config)  # TODO: duplicate print?
10949
10950        self.prompt_cache: Dict[str, Tuple[str, int]] = {}
10951
10952        # Activating Bittensor's logging with the set configurations.
10953        bt.logging.set_config(config=self.config.logging)
10954
10955        # Wallet holds cryptographic information, ensuring secure transactions and communication.
10956        self.wallet = wallet or bt.wallet(config=self.config)
10957        bt.logging.info(f"Wallet {self.wallet}")
10958
10959        # subtensor manages the blockchain connection, facilitating interaction with the Bittensor blockchain.
10960        self.subtensor = subtensor or bt.subtensor(config=self.config)
10961        bt.logging.info(f"Subtensor: {self.subtensor}")
10962        bt.logging.info(
10963            f"Running miner for subnet: {self.config.netuid} on network: {self.subtensor.chain_endpoint} with config:"
10964        )
10965
10966        # metagraph provides the network's current state, holding state about other participants in a subnet.
10967        self.metagraph = self.subtensor.metagraph(self.config.netuid)
10968        bt.logging.info(f"Metagraph: {self.metagraph}")
10969
10970        if self.wallet.hotkey.ss58_address not in self.metagraph.hotkeys:
10971            bt.logging.error(
10972                f"\nYour validator: {self.wallet} if not registered to chain connection: {self.subtensor} \nRun btcli register and try again. "
10973            )
10974            exit()
10975        else:
10976            # Each miner gets a unique identity (UID) in the network for differentiation.
10977            self.my_subnet_uid = self.metagraph.hotkeys.index(
10978                self.wallet.hotkey.ss58_address
10979            )
10980            bt.logging.info(f"Running miner on uid: {self.my_subnet_uid}")
10981
10982        # The axon handles request processing, allowing validators to send this process requests.
10983        self.axon = axon or bt.axon(
10984            wallet=self.wallet, port=self.config.axon.port
10985        )
10986        # Attach determiners which functions are called when servicing a request.
10987        bt.logging.info(f"Attaching forward function to axon.")
10988        print(f"Attaching forward function to axon. {self._prompt}")
10989        self.axon.attach(
10990            forward_fn=self._prompt,
10991        )
10992        bt.logging.info(f"Axon created: {self.axon}")
10993
10994        # Instantiate runners
10995        self.should_exit: bool = False
10996        self.is_running: bool = False
10997        self.thread: threading.Thread = None
10998        self.lock = asyncio.Lock()
10999        self.request_timestamps: Dict = {}
11000
11001    @abstractmethod
11002    def config(self) -> "bt.Config":
11003        ...
11004
11005    @classmethod
11006    @abstractmethod
11007    def add_args(cls, parser: argparse.ArgumentParser):
11008        ...
11009
11010    def _prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11011        """
11012        A wrapper method around the `prompt` method that will be defined by the subclass.
11013
11014        This method acts as an intermediary layer to perform pre-processing before calling the
11015        actual `prompt` method implemented in the subclass. Specifically, it checks whether a
11016        prompt is in cache to avoid reprocessing recent requests. If the prompt is not in the
11017        cache, the subclass `prompt` method is called.
11018
11019        Args:
11020            synapse (StreamPrompting): The incoming request object encapsulating the details of the request.
11021
11022        Returns:
11023            StreamPrompting: The response object to be sent back in reply to the incoming request, essentially
11024            the filled synapse request object.
11025
11026        Raises:
11027            ValueError: If the prompt is found in the cache indicating it was sent recently.
11028
11029        Example:
11030            This method is not meant to be called directly but is invoked internally when a request
11031            is received, and it subsequently calls the `prompt` method of the subclass.
11032        """
11033        return self.prompt(synapse)
11034
11035    @abstractmethod
11036    def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11037        """
11038        Abstract method to handle and respond to incoming requests to the miner.
11039
11040        Subclasses should implement this method to define their custom logic for processing and
11041        responding to requests. This method is designed to be overridden, and its behavior will
11042        be dependent on the specific implementation provided in the subclass.
11043
11044        Args:
11045            synapse (StreamPrompting): The incoming request object encapsulating the details
11046                of the request. This must contain `messages` and `roles` as fields.
11047
11048        Returns:
11049            StreamPrompting: The response object that should be sent back in reply to the
11050                incoming request. This is essentially the filled synapse request object.
11051
11052        Example:
11053            class CustomMiner(Miner):
11054                def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11055                    # Custom logic to process and respond to the request.
11056                    synapse.completion = "The meaning of life is 42."
11057                    return synapse
11058        """
11059        ...
11060
11061    def run(self):
11062        """
11063        Runs the miner logic. This method starts the miner's operations, including
11064        listening for incoming requests and periodically updating the miner's knowledge
11065        of the network graph.
11066        """
11067        if not self.subtensor.is_hotkey_registered(
11068            netuid=self.config.netuid,
11069            hotkey_ss58=self.wallet.hotkey.ss58_address,
11070        ):
11071            bt.logging.error(
11072                f"Wallet: {self.wallet} is not registered on netuid {self.config.netuid}"
11073                f"Please register the hotkey using `btcli subnets register` before trying again"
11074            )
11075            exit()
11076
11077        # Serve passes the axon information to the network + netuid we are hosting on.
11078        # This will auto-update if the axon port of external ip have changed.
11079        bt.logging.info(
11080            f"Serving axon {StreamPrompting} on network: {self.config.subtensor.chain_endpoint} with netuid: {self.config.netuid}"
11081        )
11082        self.axon.serve(netuid=self.config.netuid, subtensor=self.subtensor)
11083
11084        # Start  starts the miner's axon, making it active on the network.
11085        bt.logging.info(
11086            f"Starting axon server on port: {self.config.axon.port}"
11087        )
11088        self.axon.start()
11089
11090        # --- Run until should_exit = True.
11091        self.last_epoch_block = self.subtensor.get_current_block()
11092        bt.logging.info(f"Miner starting at block: {self.last_epoch_block}")
11093
11094        # This loop maintains the miner's operations until intentionally stopped.
11095        bt.logging.info(f"Starting main loop")
11096        step = 0
11097        try:
11098            while not self.should_exit:
11099                start_epoch = time.time()
11100
11101                # --- Wait until next epoch.
11102                current_block = self.subtensor.get_current_block()
11103                while (
11104                    current_block - self.last_epoch_block
11105                    < self.config.miner.blocks_per_epoch
11106                ):
11107                    # --- Wait for next bloc.
11108                    time.sleep(1)
11109                    current_block = self.subtensor.get_current_block()
11110
11111                    # --- Check if we should exit.
11112                    if self.should_exit:
11113                        break
11114
11115                # --- Update the metagraph with the latest network state.
11116                self.last_epoch_block = self.subtensor.get_current_block()
11117
11118                metagraph = self.subtensor.metagraph(
11119                    netuid=self.config.netuid,
11120                    lite=True,
11121                    block=self.last_epoch_block,
11122                )
11123                log = (
11124                    f"Step:{step} | "
11125                    f"Block:{metagraph.block.item()} | "
11126                    f"Stake:{metagraph.S[self.my_subnet_uid]} | "
11127                    f"Rank:{metagraph.R[self.my_subnet_uid]} | "
11128                    f"Trust:{metagraph.T[self.my_subnet_uid]} | "
11129                    f"Consensus:{metagraph.C[self.my_subnet_uid] } | "
11130                    f"Incentive:{metagraph.I[self.my_subnet_uid]} | "
11131                    f"Emission:{metagraph.E[self.my_subnet_uid]}"
11132                )
11133                bt.logging.info(log)
11134
11135                step += 1
11136
11137        # If someone intentionally stops the miner, it'll safely terminate operations.
11138        except KeyboardInterrupt:
11139            self.axon.stop()
11140            bt.logging.success("Miner killed by keyboard interrupt.")
11141            exit()
11142
11143        # In case of unforeseen errors, the miner will log the error and continue operations.
11144        except Exception as e:
11145            bt.logging.error(traceback.format_exc())
11146
11147    def run_in_background_thread(self):
11148        """
11149        Starts the miner's operations in a separate background thread.
11150        This is useful for non-blocking operations.
11151        """
11152        if not self.is_running:
11153            bt.logging.debug("Starting miner in background thread.")
11154            self.should_exit = False
11155            self.thread = threading.Thread(target=self.run, daemon=True)
11156            self.thread.start()
11157            self.is_running = True
11158            bt.logging.debug("Started")
11159
11160    def stop_run_thread(self):
11161        """
11162        Stops the miner's operations that are running in the background thread.
11163        """
11164        if self.is_running:
11165            bt.logging.debug("Stopping miner in background thread.")
11166            self.should_exit = True
11167            self.thread.join(5)
11168            self.is_running = False
11169            bt.logging.debug("Stopped")
11170
11171    def __enter__(self):
11172        """
11173        Starts the miner's operations in a background thread upon entering the context.
11174        This method facilitates the use of the miner in a 'with' statement.
11175        """
11176        self.run_in_background_thread()
11177
11178    def __exit__(self, exc_type, exc_value, traceback):
11179        """
11180        Stops the miner's background operations upon exiting the context.
11181        This method facilitates the use of the miner in a 'with' statement.
11182
11183        Args:
11184            exc_type: The type of the exception that caused the context to be exited.
11185                      None if the context was exited without an exception.
11186            exc_value: The instance of the exception that caused the context to be exited.
11187                       None if the context was exited without an exception.
11188            traceback: A traceback object encoding the stack trace.
11189                       None if the context was exited without an exception.
11190        """
11191        self.stop_run_thread()
11192
11193
11194class StreamingTemplateMiner(StreamMiner):
11195    def config(self) -> "bt.Config":
11196        """
11197        Returns the configuration object specific to this miner.
11198
11199        Implement and extend this method to provide custom configurations for the miner.
11200        Currently, it sets up a basic configuration parser.
11201
11202        Returns:
11203            bt.Config: A configuration object with the miner's operational parameters.
11204        """
11205        parser = argparse.ArgumentParser(description="Streaming Miner Configs")
11206        self.add_args(parser)
11207        return bt.config(parser)
11208
11209    def add_args(cls, parser: argparse.ArgumentParser):
11210        """
11211        Adds custom arguments to the command line parser.
11212
11213        Developers can introduce additional command-line arguments specific to the miner's
11214        functionality in this method. These arguments can then be used to configure the miner's operation.
11215
11216        Args:
11217            parser (argparse.ArgumentParser):
11218                The command line argument parser to which custom arguments should be added.
11219        """
11220        pass
11221
11222    def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11223        """
11224        Generates a streaming response for the provided synapse.
11225
11226        This function serves as the main entry point for handling streaming prompts. It takes
11227        the incoming synapse which contains messages to be processed and returns a streaming
11228        response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode
11229        the incoming message, and then sends the response back to the client token by token.
11230
11231        Args:
11232            synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed.
11233
11234        Returns:
11235            StreamPrompting: The streaming response object which can be used by other functions to
11236                            stream back the response to the client.
11237
11238        Usage:
11239            This function can be extended and customized based on specific requirements of the
11240            miner. Developers can swap out the tokenizer, model, or adjust how streaming responses
11241            are generated to suit their specific applications.
11242        """
11243        bt.logging.trace("HI. PROMPT()")
11244        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11245
11246        # Simulated function to decode token IDs into strings. In a real-world scenario,
11247        # this can be replaced with an actual model inference step.
11248        def model(ids):
11249            return (tokenizer.decode(id) for id in ids)
11250
11251        async def _prompt(text: str, send: Send):
11252            """
11253            Asynchronously processes the input text and sends back tokens as a streaming response.
11254
11255            This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then
11256            uses the simulated model to decode token IDs into strings. It then sends each token
11257            back to the client as a streaming response, with a delay between tokens to simulate
11258            the effect of real-time streaming.
11259
11260            Args:
11261                text (str): The input text message to be processed.
11262                send (Send): An asynchronous function that allows sending back the streaming response.
11263
11264            Usage:
11265                This function can be adjusted based on the streaming requirements, speed of
11266                response, or the model being used. Developers can also introduce more sophisticated
11267                processing steps or modify how tokens are sent back to the client.
11268            """
11269            bt.logging.trace("HI. _PROMPT()")
11270            input_ids = tokenizer(
11271                text, return_tensors="pt"
11272            ).input_ids.squeeze()
11273            buffer = []
11274            bt.logging.debug(f"Input text: {text}")
11275            bt.logging.debug(f"Input ids: {input_ids}")
11276
11277            N = 3  # Number of tokens to send back to the client at a time
11278            for token in model(input_ids):
11279                bt.logging.trace(f"appending token: {token}")
11280                buffer.append(token)
11281                # If buffer has N tokens, send them back to the client.
11282                if len(buffer) == N:
11283                    time.sleep(0.1)
11284                    joined_buffer = "".join(buffer)
11285                    bt.logging.debug(f"sedning tokens: {joined_buffer}")
11286                    await send(
11287                        {
11288                            "type": "http.response.body",
11289                            "body": joined_buffer.encode("utf-8"),
11290                            "more_body": True,
11291                        }
11292                    )
11293                    bt.logging.debug(f"Streamed tokens: {joined_buffer}")
11294                    buffer = []  # Clear the buffer for next batch of tokens
11295
11296            # Send any remaining tokens in the buffer
11297            if buffer:
11298                joined_buffer = "".join(buffer)
11299                await send(
11300                    {
11301                        "type": "http.response.body",
11302                        "body": joined_buffer.encode("utf-8"),
11303                        "more_body": False,  # No more tokens to send
11304                    }
11305                )
11306                bt.logging.trace(f"Streamed tokens: {joined_buffer}")
11307
11308        message = synapse.messages[0]
11309        bt.logging.trace(f"message in _prompt: {message}")
11310        token_streamer = partial(_prompt, message)
11311        bt.logging.trace(f"token streamer: {token_streamer}")
11312        return synapse.create_streaming_response(token_streamer)
11313
11314
11315# This is the main function, which runs the miner.
11316if __name__ == "__main__":
11317    with StreamingTemplateMiner():
11318        while True:
11319            time.sleep(1)
11320
11321
11322
11323---
11324File: /docs/stream_tutorial/protocol.py
11325---
11326
11327import pydantic
11328import bittensor as bt
11329
11330from abc import ABC, abstractmethod
11331from typing import List, Union, Callable, Awaitable
11332from starlette.responses import StreamingResponse
11333
11334
11335class StreamPrompting(bt.StreamingSynapse):
11336    """
11337    StreamPrompting is a specialized implementation of the `StreamingSynapse` tailored for prompting functionalities within
11338    the Bittensor network. This class is intended to interact with a streaming response that contains a sequence of tokens,
11339    which represent prompts or messages in a certain scenario.
11340
11341    As a developer, when using or extending the `StreamPrompting` class, you should be primarily focused on the structure
11342    and behavior of the prompts you are working with. The class has been designed to seamlessly handle the streaming,
11343    decoding, and accumulation of tokens that represent these prompts.
11344
11345    Attributes:
11346    - `roles` (List[str]): A list of roles involved in the prompting scenario. This could represent different entities
11347                           or agents involved in the conversation or use-case. They are immutable to ensure consistent
11348                           interaction throughout the lifetime of the object.
11349
11350    - `messages` (List[str]): These represent the actual prompts or messages in the prompting scenario. They are also
11351                              immutable to ensure consistent behavior during processing.
11352
11353    - `completion` (str): Stores the processed result of the streaming tokens. As tokens are streamed, decoded, and
11354                          processed, they are accumulated in the completion attribute. This represents the "final"
11355                          product or result of the streaming process.
11356    - `required_hash_fields` (List[str]): A list of fields that are required for the hash.
11357
11358    Methods:
11359    - `process_streaming_response`: This method asynchronously processes the incoming streaming response by decoding
11360                                    the tokens and accumulating them in the `completion` attribute.
11361
11362    - `deserialize`: Converts the `completion` attribute into its desired data format, in this case, a string.
11363
11364    - `extract_response_json`: Extracts relevant JSON data from the response, useful for gaining insights on the response's
11365                               metadata or for debugging purposes.
11366
11367    Note: While you can directly use the `StreamPrompting` class, it's designed to be extensible. Thus, you can create
11368    subclasses to further customize behavior for specific prompting scenarios or requirements.
11369    """
11370
11371    roles: List[str] = pydantic.Field(
11372        ...,
11373        title="Roles",
11374        description="A list of roles in the StreamPrompting scenario. Immuatable.",
11375        allow_mutation=False,
11376    )
11377
11378    messages: List[str] = pydantic.Field(
11379        ...,
11380        title="Messages",
11381        description="A list of messages in the StreamPrompting scenario. Immutable.",
11382        allow_mutation=False,
11383    )
11384
11385    required_hash_fields: List[str] = pydantic.Field(
11386        ["messages"],
11387        title="Required Hash Fields",
11388        description="A list of required fields for the hash.",
11389        allow_mutation=False,
11390    )
11391
11392    completion: str = pydantic.Field(
11393        "",
11394        title="Completion",
11395        description="Completion status of the current StreamPrompting object. This attribute is mutable and can be updated.",
11396    )
11397
11398    async def process_streaming_response(self, response: StreamingResponse):
11399        """
11400        `process_streaming_response` is an asynchronous method designed to process the incoming streaming response from the
11401        Bittensor network. It's the heart of the StreamPrompting class, ensuring that streaming tokens, which represent
11402        prompts or messages, are decoded and appropriately managed.
11403
11404        As the streaming response is consumed, the tokens are decoded from their 'utf-8' encoded format, split based on
11405        newline characters, and concatenated into the `completion` attribute. This accumulation of decoded tokens in the
11406        `completion` attribute allows for a continuous and coherent accumulation of the streaming content.
11407
11408        Args:
11409            response: The streaming response object containing the content chunks to be processed. Each chunk in this
11410                      response is expected to be a set of tokens that can be decoded and split into individual messages or prompts.
11411        """
11412        if self.completion is None:
11413            self.completion = ""
11414        bt.logging.debug(
11415            "Processing streaming response (StreamingSynapse base class)."
11416        )
11417        async for chunk in response.content.iter_any():
11418            bt.logging.debug(f"Processing chunk: {chunk}")
11419            tokens = chunk.decode("utf-8").split("\n")
11420            for token in tokens:
11421                bt.logging.debug(f"--processing token: {token}")
11422                if token:
11423                    self.completion += token
11424            bt.logging.debug(f"yielding tokens {tokens}")
11425            yield tokens
11426
11427    def deserialize(self) -> str:
11428        """
11429        Deserializes the response by returning the completion attribute.
11430
11431        Returns:
11432            str: The completion result.
11433        """
11434        return self.completion
11435
11436    def extract_response_json(self, response: StreamingResponse) -> dict:
11437        """
11438        `extract_response_json` is a method that performs the crucial task of extracting pertinent JSON data from the given
11439        response. The method is especially useful when you need a detailed insight into the streaming response's metadata
11440        or when debugging response-related issues.
11441
11442        Beyond just extracting the JSON data, the method also processes and structures the data for easier consumption
11443        and understanding. For instance, it extracts specific headers related to dendrite and axon, offering insights
11444        about the Bittensor network's internal processes. The method ultimately returns a dictionary with a structured
11445        view of the extracted data.
11446
11447        Args:
11448            response: The response object from which to extract the JSON data. This object typically includes headers and
11449                      content which can be used to glean insights about the response.
11450
11451        Returns:
11452            dict: A structured dictionary containing:
11453                - Basic response metadata such as name, timeout, total_size, and header_size.
11454                - Dendrite and Axon related information extracted from headers.
11455                - Roles and Messages pertaining to the current StreamPrompting instance.
11456                - The accumulated completion.
11457        """
11458        headers = {
11459            k.decode("utf-8"): v.decode("utf-8")
11460            for k, v in response.__dict__["_raw_headers"]
11461        }
11462
11463        def extract_info(prefix):
11464            return {
11465                key.split("_")[-1]: value
11466                for key, value in headers.items()
11467                if key.startswith(prefix)
11468            }
11469
11470        return {
11471            "name": headers.get("name", ""),
11472            "timeout": float(headers.get("timeout", 0)),
11473            "total_size": int(headers.get("total_size", 0)),
11474            "header_size": int(headers.get("header_size", 0)),
11475            "dendrite": extract_info("bt_header_dendrite"),
11476            "axon": extract_info("bt_header_axon"),
11477            "roles": self.roles,
11478            "messages": self.messages,
11479            "completion": self.completion,
11480        }
11481
11482
11483
11484---
11485File: /docs/stream_tutorial/README.md
11486---
11487
11488# Bittensor Streaming Tutorial
11489This document is intented as a developer-friendly walkthrough of integrating streaming into your bittensor application.
11490
11491If you prefer to jump right into a complete stand-alone example, see:
11492- `miner.py`
11493- `protocol.py`
11494- `client.py`
11495
11496Start your miner:
11497```bash
11498python miner.py --netuid 8 --wallet.name default --wallet.hotkey miner --subtensor.network test --axon.port 10000 --logging.trace
11499```
11500
11501Run the client:
11502```bash
11503python client.py --netuid 8 --my_uid 1 --network test
11504```
11505
11506## Overview
11507This tutorial is designed to show you how to use the streaming API to integrate into your application. It will cover the following topics:
11508- writing your streaming protocol (inherits from bittensor.StreamingSynapse)
11509- writing your streaming server (uses your streaming protocol)
11510- writing your streaming client (uses your streaming protocol)
11511
11512### Defining your streaming protocol
11513When designing your protocol, it would be helpful to look at the bittensor.StreamingSynapse for reference. Below is a condensed snippet of the abstract methods that you will need to implement in your subclass.
11514
11515You will need to implement two methods:
11516
11517- `process_streaming_response`
11518- `extract_response_json`
11519
11520These two methods are the core of your streaming protocol. The first method process_streaming_response is called as the response is being streamed from the network. It is responsible for handling the streaming response, such as parsing and accumulating data. The second method extract_response_json is  called after the response has been processed and is responsible for retrieving structured data to be post-processed in the dendrite in bittensor core code.
11521
11522```python
11523class StreamingSynapse(bittensor.Synapse, ABC):
11524    ...
11525    class BTStreamingResponse(_StreamingResponse):
11526        ...
11527    @abstractmethod
11528    async def process_streaming_response(self, response: Response):
11529        """
11530        Abstract method that must be implemented by the subclass.
11531        This method should provide logic to handle the streaming response, such as parsing and accumulating data.
11532        It is called as the response is being streamed from the network, and should be implemented to handle the specific
11533        streaming data format and requirements of the subclass.
11534
11535        Args:
11536            response: The response object to be processed, typically containing chunks of data.
11537        """
11538        ...
11539
11540    @abstractmethod
11541    def extract_response_json(self, response: Response) -> dict:
11542        """
11543        Abstract method that must be implemented by the subclass.
11544        This method should provide logic to extract JSON data from the response, including headers and content.
11545        It is called after the response has been processed and is responsible for retrieving structured data
11546        that can be used by the application.
11547
11548        Args:
11549            response: The response object from which to extract JSON data.
11550        """
11551        ...
11552    ...
11553```
11554
11555See the full reference code at the bittensor [repo](https://github.com/opentensor/bittensor/blob/master/bittensor/stream.py).
11556
11557
11558#### Create your protocol
11559Let's walk through how to create a protocol using the bittensor.StreamingSynapse class.
11560```python
11561class MyStreamingSynapse(bt.StreamingSynapse):
11562    # define your expected data fields here as pydantic field objects
11563    # This allows you to control what information is passed along the network
11564    messages: List[str] = pydantic.Field(
11565        ..., # this ellipsis (...) indicates the object is required
11566        title="Messages", # What is the name of this field?
11567        description="A list of messages in the Prompting scenario. Immutable.",
11568        allow_mutation=False, # disallow modification of this field after creation
11569    )
11570    completion: str = pydantic.Field(
11571        "",
11572        title="Completion",
11573    )
11574    # add fields as necessary
11575    ...
11576
11577    # This method controls how your synapse is deserialized from the network
11578    # E.g. you can extract whatever information you want to receive at the final
11579    # yield in the async generator returned by the server, without receiving
11580    # the entire synapse object itself.
11581    # In this example, we just want the completion string at the end.
11582    def deserialize(self) -> str:
11583        return self.completion
11584
11585    # implement your `process_streaming_response` logic to actually yield objects to the streamer
11586    # this effectively defines the async generator that you'll recieve on the client side
11587    async def process_streaming_response(self, response: MyStreamingSynapse):
11588        # this is an example of how you might process a streaming response
11589        # iterate over the response content and yield each line
11590        async for chunk in response.content.iter_any():
11591            tokens = chunk.decode("utf-8").split("\n")
11592            yield tokens
11593    
11594    # implement `extract_response_json` to extract the JSON data from the response headers
11595    # this will be dependent on the data you are streaming and how you want to structure it
11596    # it MUST conform to the following format expected by the bittensor dendrite:
11597    """
11598        {
11599            # METADATA AND HEADERS
11600            "name": ...,
11601            "timeout": float(...),
11602            "total_size": int(...),
11603            "header_size": int(...),
11604            "dendrite": ...,
11605            "axon": ...,
11606            # YOUR FIELDS
11607            "messages": self.messages,
11608            ...
11609        }
11610    """
11611    def extract_response_json(self, response: MyStreamingSynapse) -> dict:
11612        # iterate over the response headers and extract the necessary data
11613        headers = {
11614            k.decode("utf-8"): v.decode("utf-8")
11615            for k, v in response.__dict__["_raw_headers"]
11616        }
11617        # helper function to extract data from headers
11618        def extract_info(prefix):
11619            return {
11620                key.split("_")[-1]: value
11621                for key, value in headers.items()
11622                if key.startswith(prefix)
11623            }
11624        # return the extracted data in the expected format
11625        return {
11626            "name": headers.get("name", ""),
11627            "timeout": float(headers.get("timeout", 0)),
11628            "total_size": int(headers.get("total_size", 0)),
11629            "header_size": int(headers.get("header_size", 0)),
11630            "dendrite": extract_info("bt_header_dendrite"), # dendrite info
11631            "axon": extract_info("bt_header_axon"), # axon info
11632            "messages": self.messages, # field object
11633        }
11634```
11635
11636[Here](https://github.com/opentensor/text-prompting/blob/main/prompting/protocol.py#L131) is a full example implementation of a streaming protocol based on the text-prompting network.
11637
11638Please read the docstrings provided, they can be very helpful!
11639
11640### Writing the server
11641Great! Now we have our protocol defined, let's see how to define our server.
11642This will generate the tokens to be streamed in this prompting example.
11643
11644For brevity we will not be building a full miner, but inspecting the central components.
11645```python
11646class MyStreamPromptingMiner(bt.Miner):
11647    ... # any relevant methods you'd need for your miner
11648
11649    # define your server forward here
11650    # NOTE: It is crucial that your typehints are correct and reflect your streaming protocol object
11651    # otherwise the axon will reject adding your route to the server.
11652    def forward(self, synapse: MyStreamingSynapse) -> MyStreamingSynapse:
11653        # Let's use a GPT2 tokenizer for this toy example
11654        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11655
11656        # Simulated function to decode token IDs into strings. In a real-world scenario,
11657        # this can be replaced with an actual model inference step.
11658        def model(ids):
11659            return (tokenizer.decode(id) for id in ids)
11660
11661        # This function is called asynchronously to process the input text and send back tokens
11662        # as a streaming response. It essentially produces the async generator that will be
11663        # consumed by the client with an `async for` loop.
11664        async def _forward(text: str, send: Send):
11665            # `text` may be the input prompt to your model in a real-world scenario.
11666            # let's tokenize them into IDs for the sake of this example.
11667            input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze()
11668            
11669            # You may want to buffer your tokens before sending them back to the client.
11670            # this can be useful so we aren't flooding the client with individual tokens
11671            # and allows you more fine-grained control over how much data is sent back 
11672            # with each yield.
11673            N = 3  # Number of tokens to send back to the client at a time
11674            buffer = []
11675            # Iterate over the tokens and send the generationed tokens back to the client  
11676            # when we have sufficient (N) tokens in the buffer.       
11677            for token in model(input_ids):
11678                buffer.append(token) # Add token to buffer
11679
11680                # If buffer has N tokens, send them back to the client.
11681                if len(buffer) == N:
11682                    joined_buffer = "".join(buffer)
11683                    # Send the tokens back to the client
11684                    # This is the core of the streaming response and the format 
11685                    # is important. The `send` function is provided by the ASGI server
11686                    # and is responsible for sending the response back to the client.
11687                    # This buffer will be received by the client as a single chunk of
11688                    # data, which can then be split into individual tokens!
11689                    await send(
11690                        {
11691                            "type": "http.response.body",
11692                            "body": joined_buffer.encode("utf-8"),
11693                            "more_body": True,
11694                        }
11695                    )
11696                    buffer = []  # Clear the buffer for next batch of tokens
11697
11698        # Create a streaming response object using the `_forward` function
11699        # It is useful to wrap your _forward function in a partial function
11700        # to pass in the text argument lazily.
11701        token_streamer = partial(_forward, synapse.messages[0])
11702        # Return the streaming response object, which is an instance of the
11703        # `BTStreamingResponse` class.
11704        return synapse.create_streaming_response(token_streamer)
11705```
11706
11707#### Complete Example
11708Here is a full example for reference:
11709> This inherits from the prompting (text-prompting) miner base class.
11710> Take a look at the `prompting/baseminer/miner.py` file [here](https://github.com/opentensor/text-prompting/blob/main/prompting/baseminer/miner.py) for more details.
11711
11712```python
11713class StreamingTemplateMiner(prompting.Miner):
11714    def config(self) -> "bt.Config":
11715        """
11716        Returns the configuration object specific to this miner.
11717
11718        Implement and extend this method to provide custom configurations for the miner.
11719        Currently, it sets up a basic configuration parser.
11720
11721        Returns:
11722            bt.Config: A configuration object with the miner's operational parameters.
11723        """
11724        parser = argparse.ArgumentParser(description="Streaming Miner Configs")
11725        self.add_args(parser)
11726        return bt.config(parser)
11727
11728    def add_args(cls, parser: argparse.ArgumentParser):
11729        """
11730        Adds custom arguments to the command line parser.
11731
11732        Developers can introduce additional command-line arguments specific to the miner's
11733        functionality in this method. These arguments can then be used to configure the miner's operation.
11734
11735        Args:
11736            parser (argparse.ArgumentParser):
11737                The command line argument parser to which custom arguments should be added.
11738        """
11739        pass
11740
11741    def prompt(self, synapse: StreamPrompting) -> StreamPrompting:
11742        """
11743        Generates a streaming response for the provided synapse.
11744
11745        This function serves as the main entry point for handling streaming prompts. It takes
11746        the incoming synapse which contains messages to be processed and returns a streaming
11747        response. The function uses the GPT-2 tokenizer and a simulated model to tokenize and decode
11748        the incoming message, and then sends the response back to the client token by token.
11749
11750        Args:
11751            synapse (StreamPrompting): The incoming StreamPrompting instance containing the messages to be processed.
11752
11753        Returns:
11754            StreamPrompting: The streaming response object which can be used by other functions to
11755                            stream back the response to the client.
11756
11757        Usage:
11758            This function can be extended and customized based on specific requirements of the
11759            miner. Developers can swap out the tokenizer, model, or adjust how streaming responses
11760            are generated to suit their specific applications.
11761        """
11762        bt.logging.trace("In outer PROMPT()")
11763        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
11764
11765        # Simulated function to decode token IDs into strings. In a real-world scenario,
11766        # this can be replaced with an actual model inference step.
11767        def model(ids):
11768            return (tokenizer.decode(id) for id in ids)
11769
11770        async def _prompt(text: str, send: Send):
11771            """
11772            Asynchronously processes the input text and sends back tokens as a streaming response.
11773
11774            This function takes an input text, tokenizes it using the GPT-2 tokenizer, and then
11775            uses the simulated model to decode token IDs into strings. It then sends each token
11776            back to the client as a streaming response, with a delay between tokens to simulate
11777            the effect of real-time streaming.
11778
11779            Args:
11780                text (str): The input text message to be processed.
11781                send (Send): An asynchronous function that allows sending back the streaming response.
11782
11783            Usage:
11784                This function can be adjusted based on the streaming requirements, speed of
11785                response, or the model being used. Developers can also introduce more sophisticated
11786                processing steps or modify how tokens are sent back to the client.
11787            """
11788            bt.logging.trace("In inner _PROMPT()")
11789            input_ids = tokenizer(text, return_tensors="pt").input_ids.squeeze()
11790            buffer = []
11791            bt.logging.debug(f"Input text: {text}")
11792            bt.logging.debug(f"Input ids: {input_ids}")
11793             
11794            N = 3  # Number of tokens to send back to the client at a time
11795            for token in model(input_ids):
11796                bt.logging.trace(f"appending token: {token}")
11797                buffer.append(token)
11798                # If buffer has N tokens, send them back to the client.
11799                if len(buffer) == N:
11800                    time.sleep(0.1)
11801                    joined_buffer = "".join(buffer)
11802                    bt.logging.debug(f"sedning tokens: {joined_buffer}")
11803                    await send(
11804                        {
11805                            "type": "http.response.body",
11806                            "body": joined_buffer.encode("utf-8"),
11807                            "more_body": True,
11808                        }
11809                    )
11810                    bt.logging.debug(f"Streamed tokens: {joined_buffer}")
11811                    buffer = []  # Clear the buffer for next batch of tokens
11812
11813            # Send any remaining tokens in the buffer
11814            if buffer:
11815                joined_buffer = "".join(buffer)
11816                await send(
11817                    {
11818                        "type": "http.response.body",
11819                        "body": joined_buffer.encode("utf-8"),
11820                        "more_body": False,  # No more tokens to send
11821                    }
11822                )
11823                bt.logging.trace(f"Streamed tokens: {joined_buffer}")
11824
11825        message = synapse.messages[0]
11826        bt.logging.trace(f"message in _prompt: {message}")
11827        token_streamer = partial(_prompt, message)
11828        bt.logging.trace(f"token streamer: {token_streamer}")
11829        return synapse.create_streaming_response(token_streamer)
11830```
11831
11832### Writing the client
11833Excellent! Now we have defined our server, now we can define our client.
11834
11835This has assumed you have:
118361. Registered your miner on the chain (`finney`/`test`)
118372. Are serving your miner on an open port (e.g. `12345`)
11838
11839Steps:
11840- Instantiate your synapse subclass with the relevant information. E.g. `messages`, `roles`, etc.
11841- Instantiate your wallet and a dendrite client
11842- Query the dendrite client with your synapse object
11843- Iterate over the async generator to extract the yielded tokens on the server side
11844
11845```python
11846
11847# Import bittensor
11848import bittensor as bt
11849
11850# Create your streaming synapse subclass object to house the request body
11851syn = MyStreamingSynapse(
11852    roles=["user"],
11853    messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."]
11854)
11855
11856# Create a wallet instance that must be registered on the network
11857wallet = bt.wallet(name="default", hotkey="default")
11858
11859# Instantiate the metagraph
11860metagraph = bt.metagraph(
11861    netuid=8, network="test", sync=True, lite=False
11862)
11863
11864# Grab the axon you're serving
11865my_uid = 1
11866axon = metagraph.axons[my_uid]
11867
11868# Create a Dendrite instance to handle client-side communication.
11869dendrite = bt.dendrite(wallet=wallet)
11870
11871
11872This is an async function so we can use the `await` keyword when querying the server with the dendrite object.
11873async def main():
11874    # Send a request to the Axon using the Dendrite, passing in a StreamPrompting 
11875    # instance with roles and messages. The response is awaited, as the Dendrite 
11876    # communicates asynchronously with the Axon. Returns a list of async generator.
11877    responses = await dendrite(
11878        [axon],
11879        syn,
11880        deserialize=False,
11881        streaming=True
11882    )
11883
11884    # Now that we have our responses we want to iterate over the yielded tokens
11885    # iterate over the async generator to extract the yielded tokens on server side
11886    for resp in responses:
11887        i=0
11888        async for chunk in resp:
11889            i += 1
11890            if i % 5 == 0:
11891                print()
11892            if isinstance(chunk, list):
11893                print(chunk[0], end="", flush=True)
11894            else:
11895                # last object yielded is the synapse itself with completion filled
11896                synapse = chunk
11897        break
11898
11899    # The synapse object contains the completion attribute which contains the
11900    # accumulated tokens from the streaming response.
11901
11902if __name__ == "__main__":
11903    # Run the main function with asyncio
11904    asyncio.run(main())
11905    
11906```
11907There you have it!
11908
11909### Complete example
11910If you would like to see a complete standalone example that only depends on bittensor>=6.2.0, look below:
11911
11912- client.py
11913- streaming_miner.py
11914- 
11915
11916# client.py
11917```python
11918# Import bittensor and the text-prompting packages
11919import bittensor as bt
11920import prompting
11921
11922# Create a StreamPrompting synapse object to house the request body
11923syn = prompting.protocol.StreamPrompting(
11924    roles=["user"], 
11925    messages=["hello this is a test of a streaming response. Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."])
11926syn
11927
11928# create a wallet instance that must be registered on the network
11929wallet = bt.wallet(name="default", hotkey="default")
11930wallet
11931
11932# instantiate the metagraph
11933metagraph = bt.metagraph(
11934    netuid=8, network="test", sync=True, lite=False
11935)
11936metagraph
11937
11938# Grab the axon you're serving
11939axon = metagraph.axons[62]
11940axon
11941
11942# Create a Dendrite instance to handle client-side communication.
11943d = bt.dendrite(wallet=wallet)
11944d
11945
11946
11947async def main():
11948        
11949    # Send a request to the Axon using the Dendrite, passing in a StreamPrompting 
11950    # instance with roles and messages. The response is awaited, as the Dendrite 
11951    # communicates asynchronously with the Axon. Returns a list of async generator.
11952    responses = await d(
11953        [axon],
11954        syn,
11955        deserialize=False,
11956        streaming=True
11957    )
11958    responses 
11959
11960    # iterate over the async generator to extract the yielded tokens on server side
11961    for resp in responses:
11962        i=0
11963        async for chunk in resp:
11964            i += 1
11965            if i % 5 == 0:
11966                print()
11967            if isinstance(chunk, list):
11968                print(chunk[0], end="", flush=True)
11969            else:
11970                # last object yielded is the synapse itself with completion filled
11971                synapse = chunk
11972        break
11973
11974if __name__ == "__main__":
11975    import asyncio
11976    asyncio.run(main())
11977```
11978
11979
11980
11981---
11982File: /docs/validators/quickstart.md
11983---
11984
11985# Quickstart
11986
11987
11988## Dependencies
11989
11990You must have the following things:
11991
11992- System with at least 12gb of VRAM
11993- Python >=3.10
11994- OpenAI API key
11995- Anthropic API Key
11996- Google Gemini API Key
11997- Github Token
11998- Wandb account
11999
12000## Getting started
12001
12002
12003## Installation
12004
12005This repository requires python3.11, follow the commands below to install it if you do not already have it.
12006
12007ONLY RUN THE FOLLOWING COMMANDS IF YOU DO NOT HAVE PYTHON INSTALLED
12008```bash
12009sudo add-apt-repository ppa:deadsnakes/ppa
12010sudo apt update
12011sudo apt install python3.11 python3.11-venv
12012```
12013
12014Ensure that your python version is 3.11 before continuing:
12015```bash
12016python3 --version
12017```
12018
12019If the above doesnt return `python3.11` try using the command `python3.11` instead. If the cmd `python3.11` works, use that in place of every python command below. 
12020
12021YOU WILL GET SOME ERRORS ABOUT THE PYTHON VERSION, IGNORE THEM.
12022
12023After ensuring you have python run the following commands:
12024```bash
12025git clone https://github.com/brokespace/code
12026cd code
12027python3 -m venv .venv
12028source .venv/bin/activate
12029python3 -m pip install --use-deprecated=legacy-resolver -r requirements.txt
12030python3 -m pip install --use-deprecated=legacy-resolver -e .
12031python3 -m pip uninstall uvloop # b/c it causes issues with threading/loops
12032```
12033
12034
12035#### Setup your dotenv
12036
12037Copy `.env.example` to `.env` - `cp .env.example .env`. Then edit the `.env` file with the github token you get below
12038
12039#### Get a Github Token
12040
12041We require github tokens, to get one follow the instructions [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens), or below.
12042
120431. Go to [Github](http://Github.com)
120442. Open the top right menu and select `Settings`
120453. Go to the bottom left and select `Developer Settings`
120464. Go to either `Tokens (classic)` or `Fine-grained tokens`
120475. Generate a new token and place it in the .env
12048
12049#### Get an OpenAI Key
12050
12051To use OpenAI's services, you need to obtain an API key. Follow the steps below to get your OpenAI API key:
12052
120531. Go to the [OpenAI website](https://www.openai.com/).
120542. Sign up for an account if you don't already have one, or log in if you do.
120553. Navigate to the API section of your account.
120564. Generate a new API key.
120575. Copy the API key and store it in a secure location.
12058
12059Once you have your OpenAI API key, add it to your `.env` file like this:
12060
12061```
12062OPENAI_API_KEY=<your openai api key>
12063```
12064
12065#### Get a Claude API Key
12066
12067Place the api key in the .env file like this:
12068
12069```
12070ANTHROPIC_API_KEY=<your anthropic api key>
12071```
12072
12073#### Get a Gemini API Key
12074
12075Place the api key in the .env file like this:
12076
12077```
12078GOOGLE_API_KEY=<your gemini api key>
12079```
12080
12081
12082#### Setup Docker Server
12083
12084Setup the docker server to host the miner submissions.
12085
12086[Docker Server Quickstart](./swe.md)
12087
12088#### Setup LLM Server
12089
12090Start the server:
12091
12092```bash
12093source .venv/bin/activate
12094cd coding/finetune/llm
12095pm2 start --name llm-server.25000 "gunicorn app:app --workers 5 --worker-class uvicorn.workers.UvicornWorker --bind 0.0.0.0:25000 --timeout 800"
12096```
12097
12098Ensure that the port 25000 is open on your machine and accessable from the Docker server.
12099
12100Ensure that ufw is enabled on your machine, after doing so you can restrict the port to only be accessable from the Docker server by running the following commands:
12101
12102```bash 
12103sudo ufw allow from <docker-server-ip> to any port 25000
12104sudo ufw deny 25000
12105sudo ufw reload
12106```
12107
12108
12109Test that the port is open by running the following command from the docker server:
12110
12111```bash
12112curl <validator-ip>:25000
12113```
12114
12115The command should return the response: `{"detail":"Not Found"}`. If it does not, then the port is not open or accessable from the Docker server.
12116
12117#### Setup IP Addresses
12118
12119Setup the IP addresses in the .env file like this:
12120
12121```
12122DOCKER_HOST_IP=<docker-server-ip>
12123HOST_IP=<validator-server-ip>
12124DOCKER_HOST=tcp://<docker-server-ip>:2375
12125```
12126
12127#### Setup LLM Auth Key
12128
12129Setup the LLM auth key in the .env file like this:
12130
12131```
12132LLM_AUTH_KEY=<random auth key>
12133```
12134
12135#### Start the validator
12136
12137
12138
12139```bash
12140source .venv/bin/activate
12141python3 scripts/start_validator.py
12142    --netuid 45
12143    --subtensor.network <finney/local/test>
12144    --neuron.device cuda
12145    --wallet.name <your wallet> # Must be created using the bittensor-cli
12146    --wallet.hotkey <your hotkey> # Must be created using the bittensor-cli
12147    --logging.debug # Run in debug mode, alternatively --logging.trace for trace mode
12148    --axon.port # VERY IMPORTANT: set the port to be one of the open TCP ports on your machine
12149    --wandb.on True # default is true but you can disable
12150```
12151
12152
12153
12154
12155
12156---
12157File: /docs/validators/swe.md
12158---
12159
12160# SWE Start
12161
12162
12163## Remote Server Setup
12164
12165You should use a separate server from the one you run the validator on for this. This is to ensure security and avoid any potential issues. I recommend using a digital ocean droplet. A small one is fine, maybe 2-4gb of ram. 
12166
12167### Setup Docker
12168
12169Install docker: https://docs.docker.com/engine/install/ubuntu/
12170
12171Next setup https://docs.docker.com/engine/daemon/remote-access/#configuring-remote-access-with-daemonjson with 0.0.0.0:2375 - Do so by running the following commands:
12172
12173```bash
12174sudo systemctl edit docker.service
12175```
12176
12177Add the following to the file at the line where it opens:
12178```bash
12179[Service]
12180ExecStart=
12181ExecStart=/usr/bin/dockerd -H fd:// -H tcp://0.0.0.0:2375
12182```
12183
12184```bash
12185sudo systemctl daemon-reload
12186sudo systemctl restart docker.service
12187```
12188
12189### Get Base Image
12190
12191```bash
12192docker pull brokespace/swe-server:latest
12193```
12194
12195### Configure UFW
12196
12197```bash
12198sudo ufw disable
12199```
12200
12201
12202### IPTables 
12203```bash
12204sudo apt-get install iptables-persistent
12205```
12206
12207The order of the rules is important. Run the following commands to setup the rules:
12208
12209
12210Let docker manage the iptables rules update file `/etc/docker/daemon.json` with the following content:
12211```bash
12212{
12213  "iptables": true
12214}
12215```
12216```bash
12217sudo apt install ipset
12218```
12219
12220Create a file in `/etc/cron.monthly/dockerio` with the following content:
12221
12222MAKE SURE YOU SET THE IP OF THE SERVER YOU ARE RUNNING THE VALIDATOR ON IN THE IPTABLES RULES BELOW.
12223
12224```bash
12225#!/bin/bash
12226sudo iptables -F
12227sudo iptables -t nat -F
12228sudo iptables -t mangle -F
12229sudo iptables -t raw -F
12230
12231# Define the IP set name
12232IPSET_NAME="dockerio"
12233
12234# Check if the IP set exists; create it if it doesn't
12235if ! ipset list $IPSET_NAME &>/dev/null; then
12236    sudo ipset create $IPSET_NAME hash:ip
12237fi
12238
12239# Clear existing IPs in the set
12240sudo ipset flush $IPSET_NAME
12241
12242# Resolve required domains and add to ipset
12243for domain in registry-1.docker.io auth.docker.io cdn.docker.io; do
12244    for ip in $(dig +short $domain); do
12245        sudo ipset add $IPSET_NAME $ip
12246    done
12247done
12248
12249# Add iptables rules for the IP set
12250sudo iptables -A OUTPUT -m set --match-set $IPSET_NAME dst -p tcp --dport 443 -j ACCEPT
12251sudo iptables -A OUTPUT -m set --match-set $IPSET_NAME dst -p tcp --dport 80 -j ACCEPT
12252
12253# Restart Docker to apply changes
12254sudo systemctl restart docker
12255
12256sudo iptables -N DOCKER-USER
12257sudo iptables -A DOCKER-USER -p tcp --dport 3000 -j ACCEPT
12258sudo iptables -I DOCKER-USER 1 -p tcp --dport 3000 -j ACCEPT
12259sudo iptables -I DOCKER-USER 1 -p tcp --dport 25000 -j ACCEPT
12260# Allow forwarding from your host interface to the Docker bridge
12261sudo iptables -A FORWARD -p tcp -d 172.17.0.0/16 --dport 3000 -j ACCEPT
12262sudo iptables -A FORWARD -p tcp -s 172.17.0.0/16 --sport 3000 -j ACCEPT
12263sudo iptables -A INPUT -p tcp -s <ip-of-server-you-are-running-the-validator-on> --dport 2375 -j ACCEPT
12264sudo iptables -A OUTPUT -p tcp -s <ip-of-server-you-are-running-the-validator-on> --dport 2375 -j ACCEPT
12265sudo iptables -A INPUT -p tcp --dport 2375 -j DROP
12266sudo iptables -I OUTPUT 1 -p tcp --dport 25000 -j ACCEPT
12267sudo iptables -A INPUT -p tcp --sport 25000 -j ACCEPT
12268sudo iptables -A OUTPUT -p tcp --sport 25000 -j ACCEPT
12269
12270sudo iptables -A OUTPUT -p udp --dport 53 -j ACCEPT
12271sudo iptables -A INPUT -p udp --sport 53 -j ACCEPT
12272
12273sudo iptables -A INPUT -p tcp --sport 443 -j ACCEPT
12274sudo iptables -I OUTPUT 1 -p tcp --dport 3000 -j ACCEPT
12275sudo iptables -A OUTPUT -p tcp --dport 3000 -j ACCEPT
12276sudo iptables -I INPUT 1 -p tcp --dport 3000 -j ACCEPT
12277sudo iptables -A INPUT -p tcp --dport 3000 -j ACCEPT
12278
12279sudo iptables -A INPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
12280sudo iptables -A OUTPUT -m conntrack --ctstate ESTABLISHED,RELATED -j ACCEPT
12281
12282
12283# Allow outgoing SSH traffic (port 22)
12284sudo iptables -A OUTPUT -p tcp --dport 22 -j ACCEPT
12285
12286
12287# sudo iptables -I OUTPUT 1 -p tcp --dport 25000 -j ACCEPT
12288
12289
12290# Allow incoming SSH traffic (port 22)
12291sudo iptables -A INPUT -p tcp --sport 22 -j ACCEPT
12292sudo iptables -A OUTPUT -j DROP
12293sudo iptables -A DOCKER-USER -j DROP
12294sudo iptables -A INPUT -p tcp --dport 2375 -j DROP
12295
12296sudo iptables-save | sudo tee /etc/iptables/rules.v4
12297sudo systemctl restart docker
12298
12299```
12300
12301Ensure the file is executable:
12302```bash
12303sudo chmod +x /etc/cron.monthly/dockerio
12304```
12305
12306Run it now:
12307
12308```bash
12309sudo /etc/cron.monthly/dockerio
12310```
12311
12312
12313
12314## Testing Docker Remote Access
12315
12316From the server you are running the validator on - NOT THE ONE YOU RAN THE ABOVE COMMANDS ON - run the following command:
12317
12318```bash
12319curl <docker-server-ip>:2375
12320```
12321
12322it should return `{"message":"page not found"}`
12323
12324Next to test further run from the validator server:
12325
12326```bash
12327DOCKER_HOST=tcp://<docker-server-ip>:2375 docker run --rm brokespace/swe-server:latest bash -c "sleep 600"
12328```
12329
12330While that command is running you should be able to go onto the docker server and see the container running with the following command:
12331
12332```bash
12333docker ps
12334```
12335
12336
12337
12338
12339---
12340File: /docs/api_deployment.md
12341---
12342
12343# Deploying the API for use
12344
12345Run the following command:
12346
12347```bash
12348python3 -m coding.api.openai --wallet <COLDKEY NAME> --hotkey <HOTKEY NAME> --network <NETWORK RUNNING ON> --netuid <UID OF THE NETWORK>
12349```
12350
12351For example, using the default network and netuid I could start it like so:
12352
12353```bash
12354python3 -m coding.api.openai --wallet coldkey --hotkey hotkey
12355```
12356
12357
12358
12359
12360---
12361File: /docs/FAQ.md
12362---
12363
12364# FAQ
12365
12366
12367## How do I determine how good my miner is?
12368
12369Check wandb - https://wandb.ai/gen42/gen42. Complete the miner-average-score.ipynb notebook in /notebooks.
12370
12371## How do I know if my miner is working?
12372
12373Ensure you can curl it: `curl <miner-ip>:<miner-port>`.
12374
12375Ensure that you are seeing logs like "Received query" in your pm2 logs.
12376
12377Ensure that there is no errors in the logs, warnings are fine.
12378
12379## What are these pydantic errors?
12380
12381Just ignore them.
12382
12383## How can i remove debug logging?
12384
12385Edit `coding/utils/config.py` and remove line 301 `bt.debug()`.
12386
12387## How can i disable trace logging?
12388
12389Edit `coding/utils/config.py` and remove line 300 `bt.trace()`.
12390
12391## How is scoring done?
12392
12393The scoring depends on the task, however primarily it is done in the following route:
12394
123951. Get code from The Stack
123962. Rewrite the code with an LLM to ensure that lookups are not possible
123973. Grab a chunk from that code and erase it
123984. Provide the remaining code to the miner
123995. Compare the chunk to the miner's response using Cosine Similarity with CodeBERT
124006. Return the score
12401
12402
12403
12404---
12405File: /docs/running_on_mainnet.md
12406---
12407
12408# Running Subnet on Mainnet
12409
12410This tutorial shows how to use the bittensor `btcli` to create a subnetwork and connect your incentive mechanism to it. 
12411
12412**IMPORTANT:** Before attempting to register on mainnet, we strongly recommend that you:
12413- First run [Running Subnet Locally](running_on_staging.md), and
12414- Then run [Running on the Testnet](running_on_testnet.md).
12415
12416Your incentive mechanisms running on the mainnet are open to anyone. They emit real TAO. Creating these mechanisms incur a `lock_cost` in TAO.
12417
12418**DANGER**
12419- Do not expose your private keys.
12420- Only use your testnet wallet.
12421- Do not reuse the password of your mainnet wallet.
12422- Make sure your incentive mechanism is resistant to abuse. 
12423
12424## Prerequisites
12425
12426Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
12427
12428- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
12429
12430After installing `bittensor`, proceed as below:
12431
12432## Steps
12433
12434## 1. Install your subnet template
12435
12436**NOTE: Skip this step if** you already did this during local testing and development.
12437
12438In your project directory:
12439
12440```bash
12441git clone https://github.com/opentensor/bittensor-subnet-template.git 
12442```
12443
12444Next, `cd` into `bittensor-subnet-template` repo directory:
12445
12446```bash
12447cd bittensor-subnet-template
12448```
12449
12450Install the Bittensor subnet template package:
12451
12452```bash
12453python -m pip install -e . # Install your subnet template package
12454```
12455
12456## 2. Create wallets 
12457
12458Create wallets for subnet owner, subnet validator and for subnet miner.
12459  
12460This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner. 
12461
12462The owner will create and control the subnet. The owner must have at least 100  TAO before the owner can run next steps. 
12463
12464The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
12465
12466**NOTE**: You can also use existing wallets to register. Creating new keys is shown here for reference.
12467
12468Create a coldkey for the owner wallet:
12469
12470```bash
12471btcli wallet new_coldkey --wallet.name owner
12472```
12473
12474Create a coldkey and hotkey for the subnet miner wallet:
12475```bash
12476btcli wallet new_coldkey --wallet.name miner
12477```
12478
12479and
12480
12481```bash
12482btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
12483```
12484
12485Create a coldkey and hotkey for the subnet validator wallet:
12486
12487```bash
12488btcli wallet new_coldkey --wallet.name validator
12489```
12490
12491and
12492
12493```bash
12494btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
12495```
12496
12497## 3. Getting the price of subnet creation
12498
12499Creating subnets on mainnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the Bittensor blockchain. 
12500
12501By default you must have at least 100 TAO on your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below code shows how to get the current price of creating a subnet.
12502
12503```bash
12504btcli subnet lock_cost 
12505```
12506
12507The above command will show:
12508
12509```bash
12510>> Subnet lock cost: τ100.000000000
12511```
12512
12513## 4. Purchasing a slot
12514
12515Using your TAO balance, you can register your subnet to the mainchain. This will create a new subnet on the mainchain and give you the owner permissions to it. The below command shows how to purchase a slot. 
12516
12517**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered.
12518
12519```bash
12520btcli subnet create  
12521```
12522
12523Enter the owner wallet name. This gives permissions to the coldkey.
12524
12525```bash
12526>> Enter wallet name (default): owner # Enter your owner wallet name
12527>> Enter password to unlock key: # Enter your wallet password.
12528>> Register subnet? [y/n]: <y/n> # Select yes (y)
12529>> ⠇ 📡 Registering subnet...
12530✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later.
12531```
12532
12533## 5. (Optional) Register keys 
12534
12535**NOTE**: While this is not enforced, we recommend subnet owners to run a subnet validator and a subnet miner on the subnet to demonstrate proper use to the community.
12536
12537This step registers your subnet validator and subnet miner keys to the subnet giving them the **first two slots** on the subnet.
12538
12539Register your miner key to the subnet:
12540
12541```bash
12542btcli subnet recycle_register --netuid 45 --subtensor.network finney --wallet.name miner --wallet.hotkey default
12543```
12544
12545Follow the below prompts:
12546
12547```bash
12548>> Enter netuid [45] (45): # Enter netuid 1 to specify the subnet you just created.
12549>> Continue Registration?
12550  hotkey:     ...
12551  coldkey:    ...
12552  network:    finney [y/n]: # Select yes (y)
12553>> ✅ Registered
12554```
12555
12556Next, register your validator key to the subnet:
12557
12558```bash
12559btcli subnet recycle_register --netuid 45 --subtensor.network finney --wallet.name validator --wallet.hotkey default
12560```
12561
12562Follow the below prompts:
12563
12564```bash
12565>> Enter netuid [45] (45): # Enter netuid 1 to specify the subnet you just created.
12566>> Continue Registration?
12567  hotkey:     ...
12568  coldkey:    ...
12569  network:    finney [y/n]: # Select yes (y)
12570>> ✅ Registered
12571```
12572
12573## 6. Check that your keys have been registered
12574
12575Check that your subnet validator key has been registered:
12576
12577```bash
12578btcli wallet overview --wallet.name validator 
12579```
12580
12581The output will be similar to the below:
12582
12583```bash
12584Subnet: 1                                                                                                                                                                
12585COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
12586miner    default  0      True   0.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
125871        1        2            τ0.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
12588                                                                          Wallet balance: τ0.0         
12589```
12590
12591Check that your subnet miner has been registered:
12592
12593```bash
12594btcli wallet overview --wallet.name miner 
12595```
12596
12597The output will be similar to the below:
12598
12599```bash
12600Subnet: 1                                                                                                                                                                
12601COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
12602miner    default  1      True   0.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
126031        1        2            τ0.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
12604                                                                          Wallet balance: τ0.0   
12605```
12606
12607## 7. Run subnet miner and subnet validator
12608
12609Run the subnet miner:
12610
12611```bash
12612python neurons/miner.py --netuid 45  --wallet.name miner --wallet.hotkey default --logging.debug
12613```
12614
12615You will see the below terminal output:
12616
12617```bash
12618>> 2023-08-08 16:58:11.223 |       INFO       | Running miner for subnet: 45 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ...
12619```
12620
12621Run the subnet validator:
12622
12623```bash
12624python neurons/validator.py --netuid 1  --wallet.name validator --wallet.hotkey default --logging.debug
12625```
12626
12627You will see the below terminal output:
12628
12629```bash
12630>> 2023-08-08 16:58:11.223 |       INFO       | Running validator for subnet: 45 on network: wss://entrypoint-finney.opentensor.ai:443 with config: ...
12631```
12632
12633## 8. Get emissions flowing
12634
12635Register to the root subnet using the `btcli`:
12636
12637```bash
12638btcli root register 
12639```
12640
12641Then set your weights for the subnet:
12642
12643```bash
12644btcli root weights 
12645```
12646
12647## 9. Stopping your nodes
12648
12649To stop your nodes, press CTRL + C in the terminal where the nodes are running.
12650
12651---
12652
12653
12654---
12655File: /docs/running_on_staging.md
12656---
12657
12658# Running Subnet Locally
12659
12660This tutorial will guide you through:
12661
12662- Setting up a local blockchain that is not connected to either Bittensor testchain or mainchain
12663- Creating a subnet
12664- Run your incentive mechanism on the subnet.
12665
12666## Local blockchain vs local subtensor node 
12667
12668Running a local blockchain is sometimes synonymously referred as running on staging. This is **different** from running a local subtensor node that connects to the Bittensor mainchain. 
12669
12670A local subtensor node will connect to the mainchain and sync with the mainchain, giving you your own access point to the mainchain. 
12671
12672Running a local blockchain spins up two authority nodes locally, not connected to any other nodes or testchain or mainchain. This tutorial is for running a local blockchain. 
12673
12674## Prerequisites
12675
12676Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
12677
12678- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
12679
12680After installing `bittensor`, proceed as below:
12681
12682## 1. Install Substrate dependencies
12683
12684Begin by installing the required dependencies for running a Substrate node.
12685
12686Update your system packages:
12687
12688```bash
12689sudo apt update 
12690```
12691
12692Install additional required libraries and tools
12693
12694```bash
12695sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler
12696```
12697
12698## 2. Install Rust and Cargo
12699
12700Rust is the programming language used in Substrate development. Cargo is Rust package manager.
12701
12702Install rust and cargo:
12703
12704```bash
12705curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
12706```
12707
12708Update your shell's source to include Cargo's path:
12709
12710```bash
12711source "$HOME/.cargo/env"
12712```
12713
12714## 3. Clone the subtensor repository
12715
12716This step fetches the subtensor codebase to your local machine.
12717
12718```bash
12719git clone https://github.com/opentensor/subtensor.git
12720```
12721
12722## 4. Setup Rust
12723
12724This step ensures that you have the nightly toolchain and the WebAssembly (wasm) compilation target. Note that this step will run the subtensor chain on your terminal directly, hence we advise that you run this as a background process using PM2 or other software.
12725
12726Update to the nightly version of Rust:
12727
12728```bash
12729./subtensor/scripts/init.sh
12730```
12731
12732## 5. Initialize 
12733
12734These steps initialize your local subtensor chain in development mode. These commands will set up and run a local subtensor.
12735
12736Build the binary with the faucet feature enabled:
12737
12738```bash
12739cargo build --release --features pow-faucet
12740```
12741
12742**NOTE**: The `--features pow-faucet` option in the above is required if we want to use the command `btcli wallet faucet` [See the below Mint tokens step](#8-mint-tokens-from-faucet).
12743
12744Next, run the localnet script and turn off the attempt to build the binary (as we have already done this above):
12745
12746```bash
12747BUILD_BINARY=0 ./scripts/localnet.sh 
12748```
12749
12750**NOTE**: Watch for any build or initialization outputs in this step. If you are building the project for the first time, this step will take a while to finish building, depending on your hardware.
12751
12752## 6. Install subnet template
12753
12754`cd` to your project directory and clone the bittensor subnet template repository:
12755
12756```bash
12757git clone https://github.com/opentensor/bittensor-subnet-template.git
12758```
12759
12760Navigate to the cloned repository:
12761
12762```bash
12763cd bittensor-subnet-template
12764```
12765
12766Install the bittensor-subnet-template Python package:
12767
12768```bash
12769python -m pip install -e .
12770```
12771
12772## 7. Set up wallets
12773
12774You will need wallets for the different roles, i.e., subnet owner, subnet validator and subnet miner, in the subnet. 
12775
12776- The owner wallet creates and controls the subnet. 
12777- The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
12778
12779Create a coldkey for the owner role:
12780
12781```bash
12782btcli wallet new_coldkey --wallet.name owner
12783```
12784
12785Set up the miner's wallets:
12786
12787```bash
12788btcli wallet new_coldkey --wallet.name miner
12789```
12790
12791```bash
12792btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
12793```
12794
12795Set up the validator's wallets:
12796
12797```bash
12798btcli wallet new_coldkey --wallet.name validator
12799```
12800```bash
12801btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
12802```
12803
12804## 8. Mint tokens from faucet
12805
12806You will need tokens to initialize the intentive mechanism on the chain as well as for registering the subnet. 
12807
12808Run the following commands to mint faucet tokens for the owner and for the validator.
12809
12810Mint faucet tokens for the owner:
12811
12812```bash
12813btcli wallet faucet --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946 
12814```
12815
12816You will see:
12817
12818```bash
12819>> Balance: τ0.000000000 ➡ τ100.000000000
12820```
12821
12822Mint tokens for the validator:
12823
12824```bash
12825btcli wallet faucet --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 
12826```
12827
12828You will see:
12829
12830```bash
12831>> Balance: τ0.000000000 ➡ τ100.000000000
12832```
12833
12834## 9. Create a subnet
12835
12836The below commands establish a new subnet on the local chain. The cost will be exactly τ1000.000000000 for the first subnet you create and you'll have to run the faucet several times to get enough tokens.
12837
12838```bash
12839btcli subnet create --wallet.name owner --subtensor.chain_endpoint ws://127.0.0.1:9946 
12840```
12841
12842You will see:
12843
12844```bash
12845>> Your balance is: τ200.000000000
12846>> Do you want to register a subnet for τ1000.000000000? [y/n]: 
12847>> Enter password to unlock key: [YOUR_PASSWORD]
12848>> ✅ Registered subnetwork with netuid: 1
12849```
12850
12851**NOTE**: The local chain will now have a default `netuid` of 1. The second registration will create a `netuid` 2 and so on, until you reach the subnet limit of 8. If you register more than 8 subnets, then a subnet with the least staked TAO will be replaced by the 9th subnet you register.
12852
12853## 10. Register keys
12854
12855Register your subnet validator and subnet miner on the subnet. This gives your two keys unique slots on the subnet. The subnet has a current limit of 128 slots.
12856
12857Register the subnet miner:
12858
12859```bash
12860btcli subnet register --wallet.name miner --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12861```
12862
12863Follow the below prompts:
12864
12865```bash
12866>> Enter netuid [1] (1): 1
12867>> Continue Registration? [y/n]: y
12868>> ✅ Registered
12869```
12870
12871Register the subnet validator:
12872
12873```bash
12874
12875btcli subnet register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12876```
12877
12878Follow the below prompts:
12879
12880```
12881>> Enter netuid [1] (1): 1
12882>> Continue Registration? [y/n]: y
12883>> ✅ Registered
12884```
12885
12886## 11. Add stake 
12887
12888This step bootstraps the incentives on your new subnet by adding stake into its incentive mechanism.
12889
12890```bash
12891btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12892```
12893
12894Follow the below prompts:
12895
12896```bash
12897>> Stake all Tao from account: 'validator'? [y/n]: y
12898>> Stake:
12899    τ0.000000000 ➡ τ100.000000000
12900```
12901
12902## 12. Validate key registrations
12903
12904Verify that both the miner and validator keys are successfully registered:
12905
12906```bash
12907btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946
12908```
12909
12910You will see the `2` entry under `NEURONS` column for the `NETUID` of 1, indicating that you have registered a validator and a miner in this subnet:
12911
12912```bash
12913NETUID  NEURONS  MAX_N   DIFFICULTY  TEMPO  CON_REQ  EMISSION  BURN(τ)  
12914   1        2     256.00   10.00 M    1000    None     0.00%    τ1.00000 
12915   2      128    
12916```
12917
12918See the subnet validator's registered details:
12919
12920```bash
12921btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946
12922```
12923
12924You will see:
12925
12926```
12927Subnet: 1                                                                                                                                                                
12928COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
12929miner    default  0      True   100.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
129301        1        2            τ100.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
12931                                                                          Wallet balance: τ0.0         
12932```
12933
12934See the subnet miner's registered details:
12935
12936```bash
12937btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946
12938```
12939
12940You will see:
12941
12942```bash
12943Subnet: 1                                                                                                                                                                
12944COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
12945miner    default  1      True   0.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
129461        1        2            τ0.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
12947                                                                          Wallet balance: τ0.0   
12948
12949```
12950
12951## 13. Run subnet miner and subnet validator
12952
12953Run the subnet miner and subnet validator. Make sure to specify your subnet parameters.
12954
12955Run the subnet miner:
12956
12957```bash
12958python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug
12959```
12960
12961Run the subnet validator:
12962
12963```bash
12964python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug
12965```
12966
12967## 14. Set weights for your subnet
12968
12969Register a validator on the root subnet and boost to set weights for your subnet. This is a necessary step to ensure that the subnet is able to receive emmissions.
12970
12971### Register your validator on the root subnet
12972
12973```bash
12974btcli root register --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12975```
12976
12977### Boost your subnet on the root subnet
12978```bash
12979btcli root boost --netuid 1 --increase 1 --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946
12980```
12981
12982## 15. Verify your incentive mechanism
12983
12984After a few blocks the subnet validator will set weights. This indicates that the incentive mechanism is active. Then after a subnet tempo elapses (360 blocks or 72 minutes) you will see your incentive mechanism beginning to distribute TAO to the subnet miner.
12985
12986```bash
12987btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946
12988```
12989
12990## Ending your session
12991
12992To halt your nodes:
12993```bash
12994# Press CTRL + C keys in the terminal.
12995```
12996
12997---
12998
12999
13000
13001---
13002File: /docs/running_on_testnet.md
13003---
13004
13005# Running Subnet on Testnet
13006
13007This tutorial shows how to use the Bittensor testnet to create a subnet and run your incentive mechanism on it. 
13008
13009**IMPORTANT:** We strongly recommend that you first run [Running Subnet Locally](running_on_staging.md) before running on the testnet. Incentive mechanisms running on the testnet are open to anyone, and although these mechanisms on testnet do not emit real TAO, they cost you test TAO which you must create. 
13010
13011**DANGER**
13012- Do not expose your private keys.
13013- Only use your testnet wallet.
13014- Do not reuse the password of your mainnet wallet.
13015- Make sure your incentive mechanism is resistant to abuse. 
13016
13017## Prerequisites
13018
13019Before proceeding further, make sure that you have installed Bittensor. See the below instructions:
13020
13021- [Install `bittensor`](https://github.com/opentensor/bittensor#install).
13022
13023After installing `bittensor`, proceed as below:
13024
13025## 1. Install Bittensor subnet template
13026
13027**NOTE: Skip this step if** you already did this during local testing and development.
13028
13029`cd` into your project directory and clone the bittensor-subnet-template repo:
13030
13031```bash
13032git clone https://github.com/opentensor/bittensor-subnet-template.git 
13033```
13034
13035Next, `cd` into bittensor-subnet-template repo directory:
13036
13037```bash
13038cd bittensor-subnet-template # Enter the 
13039```
13040
13041Install the bittensor-subnet-template package:
13042
13043```bash
13044python -m pip install -e . 
13045```
13046
13047## 2. Create wallets 
13048
13049Create wallets for subnet owner, subnet validator and for subnet miner.
13050  
13051This step creates local coldkey and hotkey pairs for your three identities: subnet owner, subnet validator and subnet miner. 
13052
13053The owner will create and control the subnet. The owner must have at least 100 testnet TAO before the owner can run next steps. 
13054
13055The validator and miner will be registered to the subnet created by the owner. This ensures that the validator and miner can run the respective validator and miner scripts.
13056
13057Create a coldkey for your owner wallet:
13058
13059```bash
13060btcli wallet new_coldkey --wallet.name owner
13061```
13062
13063Create a coldkey and hotkey for your miner wallet:
13064
13065```bash
13066btcli wallet new_coldkey --wallet.name miner
13067```
13068
13069and
13070
13071```bash
13072btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default
13073```
13074
13075Create a coldkey and hotkey for your validator wallet:
13076
13077```bash
13078btcli wallet new_coldkey --wallet.name validator
13079```
13080
13081and
13082
13083```bash
13084btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default
13085```
13086
13087## 3. Get the price of subnet creation
13088
13089Creating subnets on the testnet is competitive. The cost is determined by the rate at which new subnets are being registered onto the chain. 
13090
13091By default you must have at least 100 testnet TAO in your owner wallet to create a subnet. However, the exact amount will fluctuate based on demand. The below command shows how to get the current price of creating a subnet.
13092
13093```bash
13094btcli subnet lock_cost --subtensor.network test
13095```
13096
13097The above command will show:
13098
13099```bash
13100>> Subnet lock cost: τ100.000000000
13101```
13102
13103## 4. (Optional) Get faucet tokens
13104   
13105Faucet is disabled on the testnet. Hence, if you don't have sufficient faucet tokens, ask the [Bittensor Discord community](https://discord.com/channels/799672011265015819/830068283314929684) for faucet tokens.
13106
13107## 5. Purchase a slot
13108
13109Using the test TAO from the previous step you can register your subnet on the testnet. This will create a new subnet on the testnet and give you the owner permissions to it. 
13110
13111The below command shows how to purchase a slot. 
13112
13113**NOTE**: Slots cost TAO to lock. You will get this TAO back when the subnet is deregistered.
13114
13115```bash
13116btcli subnet create --subtensor.network test 
13117```
13118
13119Enter the owner wallet name which gives permissions to the coldkey:
13120
13121```bash
13122>> Enter wallet name (default): owner # Enter your owner wallet name
13123>> Enter password to unlock key: # Enter your wallet password.
13124>> Register subnet? [y/n]: <y/n> # Select yes (y)
13125>> ⠇ 📡 Registering subnet...
13126✅ Registered subnetwork with netuid: 1 # Your subnet netuid will show here, save this for later.
13127```
13128
13129## 6. Register keys
13130
13131This step registers your subnet validator and subnet miner keys to the subnet, giving them the **first two slots** on the subnet.
13132
13133Register your miner key to the subnet:
13134
13135```bash
13136btcli subnet recycle_register --netuid 171 --subtensor.network test --wallet.name miner --wallet.hotkey default
13137```
13138
13139Follow the below prompts:
13140
13141```bash
13142>> Enter netuid [1] (1): # Enter netuid 1 to specify the subnet you just created.
13143>> Continue Registration?
13144  hotkey:     ...
13145  coldkey:    ...
13146  network:    finney [y/n]: # Select yes (y)
13147>> ✅ Registered
13148```
13149
13150Next, register your validator key to the subnet:
13151
13152```bash
13153btcli subnet recycle_register --netuid 171 --subtensor.network test --wallet.name validator --wallet.hotkey default
13154```
13155
13156Follow the prompts:
13157
13158```bash
13159>> Enter netuid [171] (171): # Enter netuid 1 to specify the subnet you just created.
13160>> Continue Registration?
13161  hotkey:     ...
13162  coldkey:    ...
13163  network:    finney [y/n]: # Select yes (y)
13164>> ✅ Registered
13165```
13166
13167## 7. Check that your keys have been registered
13168
13169This step returns information about your registered keys.
13170
13171Check that your validator key has been registered:
13172
13173```bash
13174btcli wallet overview --wallet.name validator --subtensor.network test
13175```
13176
13177The above command will display the below:
13178
13179```bash
13180Subnet: 1                                                                                                                                                                
13181COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
13182miner    default  0      True   0.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
131831        1        2            τ0.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
13184                                                                          Wallet balance: τ0.0         
13185```
13186
13187Check that your miner has been registered:
13188
13189```bash
13190btcli wallet overview --wallet.name miner --subtensor.network test
13191```
13192
13193The above command will display the below:
13194
13195```bash
13196Subnet: 1                                                                                                                                                                
13197COLDKEY  HOTKEY   UID  ACTIVE  STAKE(τ)     RANK    TRUST  CONSENSUS  INCENTIVE  DIVIDENDS  EMISSION(ρ)   VTRUST  VPERMIT  UPDATED  AXON  HOTKEY_SS58                    
13198miner    default  1      True   0.00000  0.00000  0.00000    0.00000    0.00000    0.00000            0  0.00000                14  none  5GTFrsEQfvTsh3WjiEVFeKzFTc2xcf…
131991        1        2            τ0.00000  0.00000  0.00000    0.00000    0.00000    0.00000           ρ0  0.00000                                                         
13200                                                                          Wallet balance: τ0.0   
13201```
13202
13203## 8. Run subnet miner and subnet validator
13204
13205Run the subnet miner:
13206
13207```bash
13208python neurons/miner.py --netuid 171 --subtensor.network test --wallet.name miner --wallet.hotkey default --logging.debug
13209```
13210
13211You will see the below terminal output:
13212
13213```bash
13214>> 2023-08-08 16:58:11.223 |       INFO       | Running miner for subnet: 171 on network: ws://127.0.0.1:9946 with config: ...
13215```
13216
13217Next, run the subnet validator:
13218
13219```bash
13220python neurons/validator.py --netuid 171 --subtensor.network test --wallet.name validator --wallet.hotkey default --logging.debug
13221```
13222
13223You will see the below terminal output:
13224
13225```bash
13226>> 2023-08-08 16:58:11.223 |       INFO       | Running validator for subnet: 171 on network: ws://127.0.0.1:9946 with config: ...
13227```
13228
13229
13230## 9. Get emissions flowing
13231
13232Register to the root network using the `btcli`:
13233
13234```bash
13235btcli root register --subtensor.network test
13236```
13237
13238Then set your weights for the subnet:
13239
13240```bash
13241btcli root weights --subtensor.network test
13242```
13243
13244## 10. Stopping your nodes
13245
13246To stop your nodes, press CTRL + C in the terminal where the nodes are running.
13247
13248
13249
13250---
13251File: /docs/vscode_completion.md
13252---
13253
13254# Getting Code Completion in VSCODE
13255
13256## Installing the extension
13257
132581. Open VS Code.
132592. Press `Ctrl+Shift+X` to open the Extensions view.
132603. Search for `Continue.dev` and install it.
132614. Restart VS Code.
13262
13263## Configuring the extension
13264
132651. Run the keybinding `Ctrl+Shift+P` to open the Command Palette.
132662. Type `Continue.dev: Open config.json` and press `Enter`.
132673. This will open the `config.json` file in your workspace.
13268
13269Now add the following configuration:
13270# TODO FINISH the below
13271```json
13272"models": [
13273    {
13274      "title": "Code",
13275      "model": "code",
13276      "contextLength": 8000,
13277      "provider": "openai",
13278      "apiKey": "EMPTY",
13279      "apiBase": "http://0.0.0.0:8000/v1"
13280
13281    }
13282  ],
13283"tabAutocompleteModel": {
13284    "title": "Code",
13285    "model": "code",
13286    "contextLength": 8000,
13287    "provider": "openai",
13288    "apiKey": "EMPTY",
13289    "apiBase": "http://0.0.0.0:8000/v1"
13290  },
13291```
13292
13293
13294
13295---
13296File: /neurons/__init__.py
13297---
13298
13299
13300
13301
13302---
13303File: /neurons/miner.py
13304---
13305
13306# The MIT License (MIT)
13307# Copyright © 2023 Yuma Rao
13308# Copyright © 2024 Broke
13309
13310# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
13311# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
13312# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13313# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13314
13315# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
13316# the Software.
13317
13318# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13319# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13320# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13321# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13322# DEALINGS IN THE SOFTWARE.
13323
13324import time
13325import typing
13326import traceback
13327import importlib
13328import bittensor as bt
13329
13330from typing import Awaitable
13331
13332# Bittensor Miner Template:
13333import coding
13334
13335# import base miner class which takes care of most of the boilerplate
13336from coding.base.miner import BaseMinerNeuron
13337from coding.utils.config import config as util_config
13338from coding.protocol import StreamCodeSynapse, LogicSynapse
13339from coding.miners.swe import miner_process as miner_process_swe
13340
13341class Miner(BaseMinerNeuron):
13342    """
13343    Your miner neuron class. You should use this class to define your miner's behavior. In particular, you should replace the forward function with your own logic. You may also want to override the blacklist and priority functions according to your needs.
13344
13345    This class inherits from the BaseMinerNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
13346
13347    This class provides reasonable default behavior for a miner such as blacklisting unrecognized hotkeys, prioritizing requests based on stake, and forwarding requests to the forward function. If you need to define custom
13348    """
13349
13350    def __init__(self, config=None):
13351        if not config:
13352            config = util_config(self)
13353        self.forward_capabilities = [
13354            {'forward': self.forward, 'blacklist': self.blacklist, 'priority': self.priority},
13355            {'forward': self.forward_swe, 'blacklist': self.blacklist_swe, 'priority': self.priority_swe},
13356        ]
13357        super().__init__(config=config)
13358        miner_name = f"coding.miners.{config.miner.name}_miner"  # if config and config.miner else "bitagent.miners.t5_miner"
13359        miner_module = importlib.import_module(miner_name)
13360        
13361        self.miner_init = miner_module.miner_init
13362        self.miner_process = miner_module.miner_process
13363
13364        self.miner_init(self)
13365
13366    async def forward_swe(
13367        self, synapse: LogicSynapse
13368    ) -> LogicSynapse:
13369        return miner_process_swe(self, synapse)
13370    
13371    async def blacklist_swe(
13372        self, synapse: LogicSynapse
13373    ) -> typing.Tuple[bool, str]:
13374        return await self.blacklist(synapse)
13375    
13376    async def priority_swe(
13377        self, synapse: LogicSynapse
13378    ) -> float:
13379        return await self.priority(synapse)
13380    
13381    def forward(
13382        self, synapse: StreamCodeSynapse
13383    ) -> StreamCodeSynapse:
13384        """
13385        Processes the incoming 'Dummy' synapse by performing a predefined operation on the input data.
13386        This method should be replaced with actual logic relevant to the miner's purpose.
13387
13388        Args:
13389            synapse (template.protocol.Dummy): The synapse object containing the 'dummy_input' data.
13390
13391        Returns:
13392            template.protocol.Dummy: The synapse object with the 'dummy_output' field set to twice the 'dummy_input' value.
13393
13394        The 'forward' function is a placeholder and should be overridden with logic that is appropriate for
13395        the miner's intended operation. This method demonstrates a basic transformation of input data.
13396        """
13397        try:
13398            response = self.miner_process(self, synapse)
13399        except:
13400            bt.logging.error(
13401                "An error occurred while processing the synapse: ",
13402                traceback.format_exc(),
13403            )
13404        return response
13405
13406    async def blacklist(
13407        self, synapse: StreamCodeSynapse
13408    ) -> typing.Tuple[bool, str]:
13409        """
13410        Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should
13411        define the logic for blacklisting requests based on your needs and desired security parameters.
13412
13413        Blacklist runs before the synapse data has been deserialized (i.e. before synapse.data is available).
13414        The synapse is instead contructed via the headers of the request. It is important to blacklist
13415        requests before they are deserialized to avoid wasting resources on requests that will be ignored.
13416
13417        Args:
13418            synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request.
13419
13420        Returns:
13421            Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted,
13422                            and a string providing the reason for the decision.
13423
13424        This function is a security measure to prevent resource wastage on undesired requests. It should be enhanced
13425        to include checks against the metagraph for entity registration, validator status, and sufficient stake
13426        before deserialization of synapse data to minimize processing overhead.
13427
13428        Example blacklist logic:
13429        - Reject if the hotkey is not a registered entity within the metagraph.
13430        - Consider blacklisting entities that are not validators or have insufficient stake.
13431
13432        In practice it would be wise to blacklist requests from entities that are not validators, or do not have
13433        enough stake. This can be checked via metagraph.S and metagraph.validator_permit. You can always attain
13434        the uid of the sender via a metagraph.hotkeys.index( synapse.dendrite.hotkey ) call.
13435
13436        Otherwise, allow the request to be processed further.
13437        """
13438        try:
13439            if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13440                bt.logging.warning("Received a request without a dendrite or hotkey.")
13441                return True, "Missing dendrite or hotkey"
13442            if (
13443                synapse.dendrite.hotkey
13444                == "5Fy7c6skhxBifdPPEs3TyytxFc7Rq6UdLqysNPZ5AMAUbRQx"
13445            ):
13446                return False, "Subnet owner hotkey"
13447            # TODO(developer): Define how miners should blacklist requests.
13448            uid = self.metagraph.hotkeys.index(synapse.dendrite.hotkey)
13449            if (
13450                not self.config.blacklist.allow_non_registered
13451                and synapse.dendrite.hotkey not in self.metagraph.hotkeys
13452            ):
13453                # Ignore requests from un-registered entities.
13454                bt.logging.trace(
13455                    f"Blacklisting un-registered hotkey {synapse.dendrite.hotkey}"
13456                )
13457                return True, "Unrecognized hotkey"
13458
13459            if self.config.blacklist.force_validator_permit:
13460                # If the config is set to force validator permit, then we should only allow requests from validators.
13461                if not self.metagraph.validator_permit[uid]:
13462                    bt.logging.warning(
13463                        f"Blacklisting a request from non-validator hotkey {synapse.dendrite.hotkey}"
13464                    )
13465                    return True, "Non-validator hotkey"
13466
13467            bt.logging.trace(
13468                f"Not Blacklisting recognized hotkey {synapse.dendrite.hotkey}"
13469            )
13470            return False, "Hotkey recognized!"
13471        except:
13472            return True, "Errored out the blacklist function, blacklisting the hotkey"
13473
13474    async def priority(
13475        self, synapse: StreamCodeSynapse
13476    ) -> float:
13477        """
13478        The priority function determines the order in which requests are handled. More valuable or higher-priority
13479        requests are processed before others. You should design your own priority mechanism with care.
13480
13481        This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph.
13482
13483        Args:
13484            synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request.
13485
13486        Returns:
13487            float: A priority score derived from the stake of the calling entity.
13488
13489        Miners may recieve messages from multiple entities at once. This function determines which request should be
13490        processed first. Higher values indicate that the request should be processed first. Lower values indicate
13491        that the request should be processed later.
13492
13493        Example priority logic:
13494        - A higher stake results in a higher priority value.
13495        """
13496        if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13497            bt.logging.warning("Received a request without a dendrite or hotkey.")
13498            return 0.0
13499        try:
13500            caller_uid = self.metagraph.hotkeys.index(
13501                synapse.dendrite.hotkey
13502            )  # Get the caller index.
13503            priority = float(
13504                self.metagraph.S[caller_uid]
13505            )  # Return the stake as the priority.
13506            bt.logging.trace(
13507                f"Prioritizing {synapse.dendrite.hotkey} with value: {priority}"
13508            )
13509            return priority
13510        except:
13511            return 1
13512
13513
13514# This is the main function, which runs the miner.
13515if __name__ == "__main__":
13516    with Miner() as miner:
13517        while True:
13518            bt.logging.info(f"Miner running... {time.time()}")
13519            time.sleep(5)
13520
13521
13522
13523---
13524File: /neurons/validator.py
13525---
13526
13527# The MIT License (MIT)
13528# Copyright © 2023 Yuma Rao
13529# Copyright © 2024 Broke
13530
13531# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
13532# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
13533# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
13534# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
13535
13536# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
13537# the Software.
13538
13539# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
13540# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
13541# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
13542# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
13543# DEALINGS IN THE SOFTWARE.
13544import dotenv
13545
13546dotenv.load_dotenv()
13547
13548import sys
13549import time
13550import random
13551import asyncio
13552import threading
13553
13554import bittensor as bt
13555from typing import Awaitable, Tuple
13556from code_bert_score import BERTScorer
13557from langchain_openai import ChatOpenAI
13558from concurrent.futures import ThreadPoolExecutor
13559from coding.validator import forward
13560from coding.rewards.pipeline import RewardPipeline
13561from coding.protocol import StreamCodeSynapse
13562
13563# import base validator class which takes care of most of the boilerplate
13564from coding.utils.config import config as util_config
13565from coding.base.validator import BaseValidatorNeuron
13566
13567class Validator(BaseValidatorNeuron):
13568    """
13569    Your validator neuron class. You should use this class to define your validator's behavior. In particular, you should replace the forward function with your own logic.
13570
13571    This class inherits from the BaseValidatorNeuron class, which in turn inherits from BaseNeuron. The BaseNeuron class takes care of routine tasks such as setting up wallet, subtensor, metagraph, logging directory, parsing config, etc. You can override any of the methods in BaseNeuron if you need to customize the behavior.
13572
13573    This class provides reasonable default behavior for a validator such as keeping a moving average of the scores of the miners and using them to set weights at the end of each epoch. Additionally, the scores are reset for new hotkeys at the end of each epoch.
13574    """
13575
13576    def __init__(self, config=None):
13577        if not config:
13578            config = util_config(self)
13579        self.finetune_results = {}
13580        super(Validator, self).__init__(config=config)
13581
13582        bt.logging.info("load_state()")
13583        self.load_state()
13584
13585        self.active_tasks = [
13586            task
13587            for task, p in zip(
13588                self.config.neuron.tasks, self.config.neuron.task_weights
13589            )
13590            if p > 0
13591        ]
13592        self.executor = ThreadPoolExecutor()
13593        # Load the reward pipeline
13594        self.reward_pipeline = RewardPipeline(
13595            selected_tasks=self.active_tasks,
13596            device=self.device,
13597            code_scorer=None,
13598        )
13599
13600    def _forward(
13601        self, synapse: StreamCodeSynapse
13602    ) -> (
13603        StreamCodeSynapse
13604    ):  # TODO remove this since its duplicate code, could be handled better
13605        """
13606        forward method that is called when the validator is queried with an axon
13607        """
13608        return forward(self, synapse)
13609        # # response = forward_organic_synapse(self, synapse=synapse)
13610
13611        # def _run():
13612        #     asyncio.run(forward(self, synapse))
13613
13614        # if random.random() < self.config.neuron.percent_organic_score:
13615        #     try:
13616        #         loop = asyncio.get_running_loop()
13617        #         loop.create_task(forward(self, synapse))
13618        #     except RuntimeError:  # No event loop running
13619        #         threading.Thread(target=_run).start()
13620        #     # return the response
13621        # return response
13622
13623    async def forward(self, synapse: StreamCodeSynapse) -> Awaitable:
13624        """
13625        Validator forward pass. Consists of:
13626        - Generating the query
13627        - Querying the miners
13628        - Getting the responses
13629        - Rewarding the miners
13630        - Updating the scores
13631        """
13632        return forward(self, synapse)
13633
13634    # TODO make it so that the only thing accepted is the subnet owners hotkey + the validators coldkey
13635    async def blacklist(self, synapse: StreamCodeSynapse) -> Tuple[bool, str]:
13636        """
13637        Determines whether an incoming request should be blacklisted and thus ignored. Your implementation should
13638        define the logic for blacklisting requests based on your needs and desired security parameters.
13639
13640        Blacklist runs before the synapse data has been deserialized (i.e. before synapse.data is available).
13641        The synapse is instead contructed via the headers of the request. It is important to blacklist
13642        requests before they are deserialized to avoid wasting resources on requests that will be ignored.
13643
13644        Args:
13645            synapse (template.protocol.Dummy): A synapse object constructed from the headers of the incoming request.
13646
13647        Returns:
13648            Tuple[bool, str]: A tuple containing a boolean indicating whether the synapse's hotkey is blacklisted,
13649                            and a string providing the reason for the decision.
13650
13651        This function is a security measure to prevent resource wastage on undesired requests. It should be enhanced
13652        to include checks against the metagraph for entity registration, validator status, and sufficient stake
13653        before deserialization of synapse data to minimize processing overhead.
13654
13655        Example blacklist logic:
13656        - Reject if the hotkey is not a registered entity within the metagraph.
13657        - Consider blacklisting entities that are not validators or have insufficient stake.
13658
13659        In practice it would be wise to blacklist requests from entities that are not validators, or do not have
13660        enough stake. This can be checked via metagraph.S and metagraph.validator_permit. You can always attain
13661        the uid of the sender via a metagraph.hotkeys.index( synapse.dendrite.hotkey ) call.
13662
13663        Otherwise, allow the request to be processed further.
13664        """
13665        if synapse.dendrite.hotkey == "5Fy7c6skhxBifdPPEs3TyytxFc7Rq6UdLqysNPZ5AMAUbRQx":
13666            return False, "Subnet owner hotkey"
13667        return True, "Blacklisted"
13668
13669    async def priority(self, synapse: StreamCodeSynapse) -> float:
13670        """
13671        The priority function determines the order in which requests are handled. More valuable or higher-priority
13672        requests are processed before others. You should design your own priority mechanism with care.
13673
13674        This implementation assigns priority to incoming requests based on the calling entity's stake in the metagraph.
13675
13676        Args:
13677            synapse (template.protocol.Dummy): The synapse object that contains metadata about the incoming request.
13678
13679        Returns:
13680            float: A priority score derived from the stake of the calling entity.
13681
13682        Miners may recieve messages from multiple entities at once. This function determines which request should be
13683        processed first. Higher values indicate that the request should be processed first. Lower values indicate
13684        that the request should be processed later.
13685
13686        Example priority logic:
13687        - A higher stake results in a higher priority value.
13688        """
13689        if synapse.dendrite is None or synapse.dendrite.hotkey is None:
13690            bt.logging.warning("Received a request without a dendrite or hotkey.")
13691            return 0.0
13692
13693        # TODO(developer): Define how miners should prioritize requests.
13694        caller_uid = self.metagraph.hotkeys.index(
13695            synapse.dendrite.hotkey
13696        )  # Get the caller index.
13697        priority = float(
13698            self.metagraph.S[caller_uid]
13699        )  # Return the stake as the priority.
13700        bt.logging.trace(
13701            f"Prioritizing {synapse.dendrite.hotkey} with value: {priority}"
13702        )
13703        return priority
13704
13705
13706# The main function parses the configuration and runs the validator.
13707if __name__ == "__main__":
13708    with Validator() as validator:
13709        while True:
13710            if not validator.thread.is_alive():
13711                bt.logging.error("Child thread has exited, terminating parent thread.")
13712                sys.exit(1)  # Exit the parent thread if the child thread dies
13713            bt.logging.info(f"Validator running... {time.time()}")
13714            time.sleep(5)
13715
13716
13717
13718---
13719File: /notebooks/example_submission/diff.py
13720---
13721
13722from difflib import unified_diff
13723from typing import Dict
13724from swebase import Patch, Edit
13725
13726def create_patch(original_files: Dict[str, str], edited_files: Dict[str, str]) -> Patch:
13727    """
13728    Create a Patch object by comparing original and edited file contents
13729    
13730    Args:
13731        original_files (Dict[str, str]): Dictionary mapping filenames to original file contents
13732        edited_files (Dict[str, str]): Dictionary mapping filenames to edited file contents
13733        
13734    Returns:
13735        Patch: Patch object containing the edits
13736    """
13737    edits = []
13738    
13739    # Process each edited file
13740    for filename in edited_files:
13741        if filename not in original_files:
13742            continue
13743            
13744        # Split files into lines
13745        original_lines = original_files[filename].splitlines()
13746        edited_lines = edited_files[filename].splitlines()
13747        
13748        # Generate diff
13749        diff = list(unified_diff(
13750            original_lines,
13751            edited_lines,
13752            lineterm='',
13753        ))
13754
13755        print(f"Diff for {filename}:")
13756        for d in diff:
13757            print(d)
13758        
13759        # Parse diff to create Edit objects
13760        line_num = 0
13761        j = 0
13762        while j < len(diff):
13763            line = diff[j]
13764            if line.startswith('@@'):
13765                # Parse the line numbers from the @@ line
13766                # Format is @@ -start,length +start,length @@
13767                parts = line.split(' ')
13768                if len(parts) >= 2:
13769                    old_range = parts[1]  # Get the -start,length part
13770                    line_num = int(old_range.split(',')[0][1:])  # Extract start number after '-'
13771            elif line.startswith('- '):
13772                old_content = line[2:]
13773                # Check if next line is an addition (modification)
13774                if j + 1 < len(diff) and diff[j + 1].startswith('+ '):
13775                    new_content = diff[j + 1][2:]
13776                    edits.append(
13777                        Edit(
13778                            file_name=filename,
13779                            line_number=line_num,
13780                            line_content=old_content,
13781                            new_line_content=new_content
13782                        )
13783                    )
13784                    j += 1  # Skip the next line since we handled it
13785                line_num += 1
13786            elif line.startswith('+ '):
13787                # This is a new line being added
13788                if line_num == 0:  # Handle additions at start of file
13789                    edits.append(
13790                        Edit(
13791                            file_name=filename,
13792                            line_number=0,
13793                            line_content="",
13794                            new_line_content=line[2:]
13795                        )
13796                    )
13797                else:  # Handle additions elsewhere
13798                    edits.append(
13799                        Edit(
13800                            file_name=filename,
13801                            line_number=line_num,
13802                            line_content="",
13803                            new_line_content=line[2:]
13804                        )
13805                    )
13806            elif not line.startswith('@@'):
13807                line_num += 1
13808            j += 1
13809    return Patch(edits=edits)
13810
13811
13812---
13813File: /notebooks/example_submission/files.py
13814---
13815
13816import os
13817from typing import List
13818
13819def load_directory(directory: str) -> List[str]:
13820    # Create repo_files dict from task.repo.path
13821    repo_files = {}
13822
13823    # Walk through all files in repo path
13824    for root, dirs, files in os.walk(directory):
13825        # Skip __pycache__ directories
13826        if '__pycache__' in dirs:
13827            dirs.remove('__pycache__')
13828            
13829        # Get relative path from repo root
13830        rel_path = os.path.relpath(root, directory)
13831        
13832        # Process all files
13833        for filename in files:
13834            # Skip __pycache__ files
13835            if '__pycache__' in filename:
13836                continue
13837                
13838            file_path = os.path.join(root, filename)
13839            
13840            # Get the relative path for the repo_files dict key
13841            if rel_path == '.':
13842                repo_key = filename
13843            else:
13844                repo_key = os.path.join(rel_path, filename)
13845                
13846            # Read file contents
13847            with open(file_path, 'r', encoding='latin-1') as f:
13848                repo_files[repo_key] = f.read()
13849    return repo_files
13850
13851
13852---
13853File: /notebooks/example_submission/fix.py
13854---
13855
13856from typing import List, Dict
13857FIX_PROMPT = """
13858Given the following file and the issue, rewrite the file to fix the issue. If no issue is found, respond with nothing.
13859
13860File: {file}
13861
13862Issue: {issue}
13863"""
13864
13865
13866def fix(files: Dict[str, str], file_names: List[str], issue: str, llm) -> Dict[str, str]:
13867    fixed_files = {}
13868    for file_name in file_names:
13869        prompt = FIX_PROMPT.format(file=files[file_name], issue=issue)
13870        response, _ = llm(prompt, "gpt-4o")
13871        
13872        # Extract code block if present
13873        if "```python" in response:
13874            start = response.find("```python") + len("```python")
13875            end = response.find("```", start)
13876            response = response[start:end]
13877        elif "```" in response:
13878            start = response.find("```") + len("```")
13879            end = response.find("```", start)
13880            response = response[start:end]
13881            
13882        if response:
13883            fixed_files[file_name] = response.strip()
13884            
13885    return fixed_files
13886
13887
13888---
13889File: /notebooks/example_submission/search.py
13890---
13891
13892import ast
13893from typing import List
13894
13895SEARCH_PROMPT = """
13896Given the following file names, find the file that contains the code that is relevant to the issue.
13897
13898{file_names}
13899
13900Issue: {issue}
13901
13902Your response should be a python list of file names.
13903"""
13904
13905def search(file_names: List[str], issue: str, llm) -> str:
13906    prompt = SEARCH_PROMPT.format(file_names=file_names, issue=issue)
13907    response, _ = llm(prompt, "gpt-4o")
13908    
13909    # Extract code block if present
13910    if "```python" in response:
13911        start = response.find("```python") + len("```python")
13912        end = response.find("```", start)
13913        response = response[start:end]
13914    elif "```" in response:
13915        start = response.find("```") + len("```") 
13916        end = response.find("```", start)
13917        response = response[start:end]
13918        
13919    # Clean and parse the response
13920    response = response.strip()
13921    try:
13922        # Safely evaluate the string as a Python literal
13923        import ast
13924        files = ast.literal_eval(response)
13925        if not isinstance(files, list):
13926            files = [files]
13927    except:
13928        # Fallback to basic string parsing if eval fails
13929        files = response.replace("[", "").replace("]", "").replace("'", "").replace("\"", "").split(",")
13930        files = [f.strip() for f in files if f.strip()]
13931        
13932    return files
13933
13934
13935---
13936File: /notebooks/example_submission/submission.py
13937---
13938
13939from fix import fix
13940from search import search
13941from diff import create_patch
13942from files import load_directory
13943from swebase import SWEBase, Patch
13944
13945
13946class SWE(SWEBase):
13947    def __call__(self, repo_location: str, issue_description: str) -> Patch:
13948        print(f"Searching for relevant files for issue: {issue_description}")
13949        file_names = search(repo_location, issue_description, self.llm)
13950        print(f"Found relevant files: {file_names}")
13951        
13952        print(f"Loading files from directory: {repo_location}")
13953        files = load_directory(repo_location)
13954        print(f"Loaded {len(files)} files")
13955        
13956        print("Fixing files...")
13957        fixed_files = fix(files, file_names, issue_description, self.llm)
13958        print(f"Fixed {len(fixed_files)} files")
13959        
13960        print("Creating patch...")
13961        patch = create_patch(files, fixed_files)
13962        print("Patch created")
13963        return patch
13964
13965
13966
13967---
13968File: /notebooks/example_submission/swebase.py
13969---
13970
13971import os
13972import requests
13973from pydantic import BaseModel
13974from abc import ABC, abstractmethod
13975from langchain_openai import ChatOpenAI
13976
13977class Edit(BaseModel):
13978    file_name: str
13979    line_number: int
13980    line_content: str
13981    new_line_content: str
13982
13983class Patch(BaseModel):
13984    edits: list[Edit]
13985
13986class LLMClient:
13987    def __init__(self, base_url: str = f"http://{os.getenv('HOST_IP', 'localhost')}:25000"):
13988        """Initialize LLM client with API server URL"""
13989        self.base_url = base_url.rstrip("/")
13990        self.use_server = True
13991        try:
13992            # Test connection to server
13993            requests.get(self.base_url)
13994        except requests.exceptions.RequestException:
13995            # If server not available, fall back to local ChatOpenAI
13996            self.use_server = False
13997            from langchain_openai import ChatOpenAI
13998            self.chat_models = {}
13999
14000    def __call__(self, query: str, llm_name: str) -> tuple[str, int]:
14001        """
14002        Call LLM API endpoint or local ChatOpenAI
14003
14004        Args:
14005            query (str): The prompt/query to send to the LLM
14006            llm_name (str): Name of LLM model to use (e.g. "gpt-4", "claude-3-sonnet")
14007
14008        Returns:
14009            tuple[str, int]: (Generated response text, Total tokens used for this key)
14010
14011        Raises:
14012            requests.exceptions.RequestException: If API call fails when using server
14013        """
14014        if self.use_server:
14015            payload = {"query": query, "llm_name": llm_name}
14016            response = requests.post(f"{self.base_url}/call", json=payload)
14017            response.raise_for_status()
14018            result = response.json()
14019            return result["result"], result["total_tokens"]
14020        else:
14021            # Use local ChatOpenAI
14022            if llm_name not in self.chat_models:
14023                self.chat_models[llm_name] = ChatOpenAI(model_name=llm_name)
14024            response = self.chat_models[llm_name].invoke(query)
14025            # ChatOpenAI doesn't provide token count, so return -1
14026            return response.content, -1
14027    
14028    def embed(self, query: str) -> list[float]:
14029        """
14030        Get embeddings for text using the embedding API endpoint or local embeddings
14031
14032        Args:
14033            query (str): The text to get embeddings for
14034
14035        Returns:
14036            list[float]: Vector embedding of the input text
14037
14038        Raises:
14039            requests.exceptions.RequestException: If API call fails when using server
14040        """
14041        if self.use_server:
14042            payload = {"query": query}
14043            response = requests.post(f"{self.base_url}/embed", json=payload)
14044            response.raise_for_status()
14045            result = response.json()
14046            return result["vector"]
14047        else:
14048            # Use local embeddings
14049            from langchain_openai import OpenAIEmbeddings
14050            embeddings = OpenAIEmbeddings()
14051            return embeddings.embed_query(query)
14052        
14053class SWEBase(ABC):
14054    def __init__(self):
14055        self.llm = LLMClient()
14056
14057    @abstractmethod
14058    def __call__(self, repo_location: str, issue_description: str) -> Patch:
14059        pass
14060
14061
14062
14063---
14064File: /scripts/check_compatibility.sh
14065---
14066
14067#!/bin/bash
14068
14069if [ -z "$1" ]; then
14070    echo "Please provide a Python version as an argument."
14071    exit 1
14072fi
14073
14074python_version="$1"
14075all_passed=true
14076
14077GREEN='\033[0;32m'
14078YELLOW='\033[0;33m'
14079RED='\033[0;31m'
14080NC='\033[0m' # No Color
14081
14082check_compatibility() {
14083    all_supported=0
14084
14085    while read -r requirement; do
14086        # Skip lines starting with git+
14087        if [[ "$requirement" == git+* ]]; then
14088            continue
14089        fi
14090
14091        package_name=$(echo "$requirement" | awk -F'[!=<>]' '{print $1}' | awk -F'[' '{print $1}') # Strip off brackets
14092        echo -n "Checking $package_name... "
14093
14094        url="https://pypi.org/pypi/$package_name/json"
14095        response=$(curl -s $url)
14096        status_code=$(curl -s -o /dev/null -w "%{http_code}" $url)
14097
14098        if [ "$status_code" != "200" ]; then
14099            echo -e "${RED}Information not available for $package_name. Failure.${NC}"
14100            all_supported=1
14101            continue
14102        fi
14103
14104        classifiers=$(echo "$response" | jq -r '.info.classifiers[]')
14105        requires_python=$(echo "$response" | jq -r '.info.requires_python')
14106
14107        base_version="Programming Language :: Python :: ${python_version%%.*}"
14108        specific_version="Programming Language :: Python :: $python_version"
14109
14110        if echo "$classifiers" | grep -q "$specific_version" || echo "$classifiers" | grep -q "$base_version"; then
14111            echo -e "${GREEN}Supported${NC}"
14112        elif [ "$requires_python" != "null" ]; then
14113            if echo "$requires_python" | grep -Eq "==$python_version|>=$python_version|<=$python_version"; then
14114                echo -e "${GREEN}Supported${NC}"
14115            else
14116                echo -e "${RED}Not compatible with Python $python_version due to constraint $requires_python.${NC}"
14117                all_supported=1
14118            fi
14119        else
14120            echo -e "${YELLOW}Warning: Specific version not listed, assuming compatibility${NC}"
14121        fi
14122    done < requirements.txt
14123
14124    return $all_supported
14125}
14126
14127echo "Checking compatibility for Python $python_version..."
14128check_compatibility
14129if [ $? -eq 0 ]; then
14130    echo -e "${GREEN}All requirements are compatible with Python $python_version.${NC}"
14131else
14132    echo -e "${RED}All requirements are NOT compatible with Python $python_version.${NC}"
14133    all_passed=false
14134fi
14135
14136echo ""
14137if $all_passed; then
14138    echo -e "${GREEN}All tests passed.${NC}"
14139else
14140    echo -e "${RED}All tests did not pass.${NC}"
14141    exit 1
14142fi
14143
14144
14145
14146---
14147File: /scripts/check_requirements_changes.sh
14148---
14149
14150#!/bin/bash
14151
14152# Check if requirements files have changed in the last commit
14153if git diff --name-only HEAD~1 | grep -E 'requirements.txt|requirements.txt'; then
14154    echo "Requirements files have changed. Running compatibility checks..."
14155    echo 'export REQUIREMENTS_CHANGED="true"' >> $BASH_ENV
14156else
14157    echo "Requirements files have not changed. Skipping compatibility checks..."
14158    echo 'export REQUIREMENTS_CHANGED="false"' >> $BASH_ENV
14159fi
14160
14161
14162
14163---
14164File: /scripts/docker-firewall.py
14165---
14166
14167import docker
14168import subprocess
14169import time
14170
14171def run_command(command):
14172    """Run a shell command and return its output."""
14173    result = subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
14174    if result.returncode != 0:
14175        print(f"Error running command '{command}': {result.stderr.strip()}")
14176    return result.stdout.strip()
14177
14178def get_container_ip(container):
14179    """Get the IP address of a container."""
14180    try:
14181        return container.attrs['NetworkSettings']['IPAddress']
14182    except KeyError:
14183        return None
14184
14185def add_iptables_rule(ip):
14186    """Add an iptables rule to restrict a container's traffic to port 25000."""
14187    # Check if the rule already exists
14188    existing_rule = run_command(f"iptables -C FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14189    if existing_rule:
14190        return  # Rule already exists
14191
14192    # Add the rules
14193    run_command(f"iptables -A FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14194    run_command(f"iptables -A FORWARD -s {ip} -j DROP")
14195    print(f"Added iptables rules for IP: {ip}")
14196
14197def monitor_containers():
14198    """Monitor Docker containers and apply iptables rules dynamically."""
14199    client = docker.from_env()
14200    applied_ips = set()
14201
14202    while True:
14203        try:
14204            containers = client.containers.list()
14205            for container in containers:
14206                if "swe" in container.name:
14207                    ip = get_container_ip(container)
14208                    if ip and ip not in applied_ips:
14209                        add_iptables_rule(ip)
14210                        applied_ips.add(ip)
14211
14212            # Clean up rules for stopped containers
14213            active_ips = {get_container_ip(c) for c in containers if "swe" in c.name}
14214            removed_ips = applied_ips - active_ips
14215            for ip in removed_ips:
14216                run_command(f"iptables -D FORWARD -s {ip} -p tcp --dport 25000 -j ACCEPT")
14217                run_command(f"iptables -D FORWARD -s {ip} -j DROP")
14218                print(f"Removed iptables rules for IP: {ip}")
14219                applied_ips.remove(ip)
14220
14221        except Exception as e:
14222            print(f"Error: {e}")
14223
14224        time.sleep(5)  # Check every 5 seconds
14225
14226if __name__ == "__main__":
14227    monitor_containers()
14228
14229
14230
14231---
14232File: /scripts/install_staging.sh
14233---
14234
14235#!/bin/bash
14236
14237# Section 1: Build/Install
14238# This section is for first-time setup and installations.
14239
14240install_dependencies() {
14241    # Function to install packages on macOS
14242    install_mac() {
14243        which brew > /dev/null
14244        if [ $? -ne 0 ]; then
14245            echo "Installing Homebrew..."
14246            /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
14247        fi
14248        echo "Updating Homebrew packages..."
14249        brew update
14250        echo "Installing required packages..."
14251        brew install make llvm curl libssl protobuf tmux
14252    }
14253
14254    # Function to install packages on Ubuntu/Debian
14255    install_ubuntu() {
14256        echo "Updating system packages..."
14257        sudo apt update
14258        echo "Installing required packages..."
14259        sudo apt install --assume-yes make build-essential git clang curl libssl-dev llvm libudev-dev protobuf-compiler tmux
14260    }
14261
14262    # Detect OS and call the appropriate function
14263    if [[ "$OSTYPE" == "darwin"* ]]; then
14264        install_mac
14265    elif [[ "$OSTYPE" == "linux-gnu"* ]]; then
14266        install_ubuntu
14267    else
14268        echo "Unsupported operating system."
14269        exit 1
14270    fi
14271
14272    # Install rust and cargo
14273    curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
14274
14275    # Update your shell's source to include Cargo's path
14276    source "$HOME/.cargo/env"
14277}
14278
14279# Call install_dependencies only if it's the first time running the script
14280if [ ! -f ".dependencies_installed" ]; then
14281    install_dependencies
14282    touch .dependencies_installed
14283fi
14284
14285
14286# Section 2: Test/Run
14287# This section is for running and testing the setup.
14288
14289# Create a coldkey for the owner role
14290wallet=${1:-owner}
14291
14292# Logic for setting up and running the environment
14293setup_environment() {
14294    # Clone subtensor and enter the directory
14295    if [ ! -d "subtensor" ]; then
14296        git clone https://github.com/opentensor/subtensor.git
14297    fi
14298    cd subtensor
14299    git pull
14300
14301    # Update to the nightly version of rust
14302    ./scripts/init.sh
14303
14304    cd ../bittensor-subnet-template
14305
14306    # Install the bittensor-subnet-template python package
14307    python -m pip install -e .
14308
14309    # Create and set up wallets
14310    # This section can be skipped if wallets are already set up
14311    if [ ! -f ".wallets_setup" ]; then
14312        btcli wallet new_coldkey --wallet.name $wallet --no_password --no_prompt
14313        btcli wallet new_coldkey --wallet.name miner --no_password --no_prompt
14314        btcli wallet new_hotkey --wallet.name miner --wallet.hotkey default --no_prompt
14315        btcli wallet new_coldkey --wallet.name validator --no_password --no_prompt
14316        btcli wallet new_hotkey --wallet.name validator --wallet.hotkey default --no_prompt
14317        touch .wallets_setup
14318    fi
14319
14320}
14321
14322# Call setup_environment every time
14323setup_environment 
14324
14325## Setup localnet
14326# assumes we are in the bittensor-subnet-template/ directory
14327# Initialize your local subtensor chain in development mode. This command will set up and run a local subtensor network.
14328cd ../subtensor
14329
14330# Start a new tmux session and create a new pane, but do not switch to it
14331echo "FEATURES='pow-faucet runtime-benchmarks' BT_DEFAULT_TOKEN_WALLET=$(cat ~/.bittensor/wallets/$wallet/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+') bash scripts/localnet.sh" >> setup_and_run.sh
14332chmod +x setup_and_run.sh
14333tmux new-session -d -s localnet -n 'localnet'
14334tmux send-keys -t localnet 'bash ../subtensor/setup_and_run.sh' C-m
14335
14336# Notify the user
14337echo ">> localnet.sh is running in a detached tmux session named 'localnet'"
14338echo ">> You can attach to this session with: tmux attach-session -t localnet"
14339
14340# Register a subnet (this needs to be run each time we start a new local chain)
14341btcli subnet create --wallet.name $wallet --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14342
14343# Transfer tokens to miner and validator coldkeys
14344export BT_MINER_TOKEN_WALLET=$(cat ~/.bittensor/wallets/miner/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+')
14345export BT_VALIDATOR_TOKEN_WALLET=$(cat ~/.bittensor/wallets/validator/coldkeypub.txt | grep -oP '"ss58Address": "\K[^"]+')
14346
14347btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_MINER_TOKEN_WALLET --amount 1000 --no_prompt
14348btcli wallet transfer --subtensor.network ws://127.0.0.1:9946 --wallet.name $wallet --dest $BT_VALIDATOR_TOKEN_WALLET --amount 10000 --no_prompt
14349
14350# Register wallet hotkeys to subnet
14351btcli subnet register --wallet.name miner --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14352btcli subnet register --wallet.name validator --netuid 1 --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14353
14354# Add stake to the validator
14355btcli stake add --wallet.name validator --wallet.hotkey default --subtensor.chain_endpoint ws://127.0.0.1:9946 --amount 10000 --no_prompt
14356
14357# Ensure both the miner and validator keys are successfully registered.
14358btcli subnet list --subtensor.chain_endpoint ws://127.0.0.1:9946
14359btcli wallet overview --wallet.name validator --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14360btcli wallet overview --wallet.name miner --subtensor.chain_endpoint ws://127.0.0.1:9946 --no_prompt
14361
14362cd ../bittensor-subnet-template
14363
14364
14365# Check if inside a tmux session
14366if [ -z "$TMUX" ]; then
14367    # Start a new tmux session and run the miner in the first pane
14368    tmux new-session -d -s bittensor -n 'miner' 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug'
14369    
14370    # Split the window and run the validator in the new pane
14371    tmux split-window -h -t bittensor:miner 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name validator --wallet.hotkey default --logging.debug'
14372    
14373    # Attach to the new tmux session
14374    tmux attach-session -t bittensor
14375else
14376    # If already in a tmux session, create two panes in the current window
14377    tmux split-window -h 'python neurons/miner.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name miner --wallet.hotkey default --logging.debug'
14378    tmux split-window -v -t 0 'python neurons/validator.py --netuid 1 --subtensor.chain_endpoint ws://127.0.0.1:9946 --wallet.name3 validator --wallet.hotkey default --logging.debug'
14379fi
14380
14381
14382
14383---
14384File: /scripts/start_validator.py
14385---
14386
14387"""
14388This script runs a validator process and automatically updates it when a new version is released.
14389Command-line arguments will be forwarded to validator (`neurons/validator.py`), so you can pass
14390them like this:
14391    python3 scripts/start_validator.py --wallet.name=my-wallet
14392Auto-updates are enabled by default and will make sure that the latest version is always running
14393by pulling the latest version from git and upgrading python packages. This is done periodically.
14394Local changes may prevent the update, but they will be preserved.
14395
14396The script will use the same virtual environment as the one used to run it. If you want to run
14397validator within virtual environment, run this auto-update script from the virtual environment.
14398
14399Pm2 is required for this script. This script will start a pm2 process using the name provided by
14400the --pm2_name argument.
14401"""
14402
14403import argparse
14404import logging
14405import subprocess
14406import sys
14407import os
14408import time
14409from datetime import timedelta
14410from shlex import split
14411from typing import List
14412import datetime
14413
14414log = logging.getLogger(__name__)
14415UPDATES_CHECK_TIME = timedelta(minutes=1)
14416
14417
14418def get_version() -> str:
14419    """Extract the version as current git commit hash"""
14420    result = subprocess.run(
14421        split("git rev-parse HEAD"),
14422        check=True,
14423        capture_output=True,
14424        cwd=os.getcwd(),
14425    )
14426    commit = result.stdout.decode().strip()
14427    assert len(commit) == 40, f"Invalid commit hash: {commit}"
14428    return commit[:8]
14429
14430
14431def start_validator_process(pm2_name: str, args: List[str], current_version: str = "0") -> subprocess.Popen:
14432    """
14433    Spawn a new python process running neurons.validator.
14434    `sys.executable` ensures thet the same python interpreter is used as the one
14435    used to run this auto-updater.
14436    """
14437    assert sys.executable, "Failed to get python executable"
14438
14439    # First check if process already exists and delete it
14440    try:
14441        subprocess.run(("pm2", "delete", pm2_name), cwd=os.getcwd(), check=True)
14442    except subprocess.CalledProcessError:
14443        # Process doesn't exist, which is fine
14444        pass
14445
14446    log.info("Starting validator process with pm2, name: %s", pm2_name)
14447    process = subprocess.Popen(
14448        (
14449            "pm2",
14450            "start",
14451            "--interpreter",
14452            "python3",
14453            "--name",
14454            pm2_name,
14455            "neurons/validator.py",
14456            "--",
14457            *args,
14458        ),
14459        cwd=os.getcwd(),
14460    )
14461    process.pm2_name = pm2_name
14462    log.info("Started validator process with pm2, name: %s, version: %s", pm2_name, current_version)
14463
14464    return process
14465
14466def stop_validator_process(process: subprocess.Popen) -> None:
14467    """Stop the validator process"""
14468    subprocess.run(("pm2", "delete", process.pm2_name), cwd=os.getcwd(), check=True)
14469
14470
14471def pull_latest_version() -> None:
14472    """
14473    Pull the latest version from git.
14474    This uses `git pull --rebase`, so if any changes were made to the local repository,
14475    this will try to apply them on top of origin's changes. This is intentional, as we
14476    don't want to overwrite any local changes. However, if there are any conflicts,
14477    this will abort the rebase and return to the original state.
14478    The conflicts are expected to happen rarely since validator is expected
14479    to be used as-is.
14480    """
14481    try:
14482        subprocess.run(split("git pull --rebase --autostash"), check=True, cwd=os.getcwd())
14483    except subprocess.CalledProcessError as exc:
14484        log.error("Failed to pull, reverting: %s", exc)
14485        
14486        subprocess.run(split("git rebase --abort"), check=True, cwd=os.getcwd())
14487
14488
14489def upgrade_packages() -> None:
14490    """
14491    Upgrade python packages by running `pip install --upgrade -r requirements.txt`.
14492    Notice: this won't work if some package in `requirements.txt` is downgraded.
14493    Ignored as this is unlikely to happen.
14494    """
14495    log.info("Upgrading requirements")
14496    try:
14497        subprocess.run(
14498            split(f"{sys.executable} -m pip install --use-deprecated=legacy-resolver -r requirements.txt"),
14499            check=True,
14500            cwd=os.getcwd(),
14501        )
14502    except subprocess.CalledProcessError as exc:
14503        log.error("Failed to upgrade packages, proceeding anyway. %s", exc)
14504
14505    log.info("Upgrading packages")
14506    try:
14507        subprocess.run(
14508            split(f"{sys.executable} -m pip install -e ."),
14509            check=True,
14510            cwd=os.getcwd(),
14511        )
14512    except subprocess.CalledProcessError as exc:
14513        log.error("Failed to upgrade packages, proceeding anyway. %s", exc)
14514
14515
14516def main(pm2_name: str, args: List[str]) -> None:
14517    """
14518    Run the validator process and automatically update it when a new version is released.
14519    This will check for updates every `UPDATES_CHECK_TIME` and update the validator
14520    if a new version is available. Update is performed as simple `git pull --rebase`.
14521    """
14522
14523    validator = start_validator_process(pm2_name, args)
14524    current_version = get_version()
14525
14526    log.info("Current version: %s", current_version)
14527
14528    try:
14529        while True:
14530            try:
14531                pull_latest_version()
14532                latest_version = get_version()
14533                log.info("Latest version: %s", latest_version)  
14534
14535                if latest_version != current_version:
14536                    log.info(
14537                        "Upgraded to latest version: %s -> %s",
14538                        current_version,
14539                        latest_version,
14540                    )
14541                    upgrade_packages()
14542                    current_version = get_version() 
14543                    stop_validator_process(validator)
14544                    validator = start_validator_process(pm2_name, args, current_version)
14545                    current_version = latest_version
14546
14547                time.sleep(UPDATES_CHECK_TIME.total_seconds())
14548            except:
14549                pass
14550    finally:
14551        stop_validator_process(validator)
14552
14553
14554if __name__ == "__main__":
14555    logging.basicConfig(
14556        level=logging.INFO,
14557        format="%(asctime)s %(levelname)s %(message)s",
14558        handlers=[logging.StreamHandler(sys.stdout)],
14559    )
14560
14561    parser = argparse.ArgumentParser(
14562        description="Automatically update and restart the validator process when a new version is released.",
14563        epilog="Example usage: python start_validator.py --pm2_name 'sn45vali' --wallet_name 'wallet1' --wallet_hotkey 'key123'",
14564    )
14565
14566    parser.add_argument("--pm2_name", default="sn45vali", help="Name of the PM2 process.")
14567
14568    flags, extra_args = parser.parse_known_args()
14569
14570    main(flags.pm2_name, extra_args)        
14571
14572
14573
14574
14575---
14576File: /verify/generate.py
14577---
14578
14579from substrateinterface import Keypair
14580from os import getenv, environ
14581from datetime import datetime
14582import bittensor
14583
14584# Hardcode or set the environment variable WALLET_PASS to the password for the wallet
14585# environ["WALLET_PASS"] = ""
14586
14587
14588def main(args):
14589    wallet = bittensor.wallet(name=args.name)
14590    keypair = wallet.coldkey
14591
14592    timestamp = datetime.now()
14593    timezone = timestamp.astimezone().tzname()
14594
14595    message = f"On {timestamp} {timezone} {args.message}"
14596    signature = keypair.sign(data=message)
14597
14598    file_contents = f"{message}\n\tSigned by: {keypair.ss58_address}\n\tSignature: {signature.hex()}"
14599    print(file_contents)
14600    open("message_and_signature.txt", "w").write(file_contents)
14601
14602    print(f"Signature generated and saved to message_and_signature.txt")
14603
14604
14605if __name__ == "__main__":
14606    import argparse
14607
14608    parser = argparse.ArgumentParser(description="Generate a signature")
14609    parser.add_argument("--message", help="The message to sign", type=str)
14610    parser.add_argument("--name", help="The wallet name", type=str)
14611    args = parser.parse_args()
14612
14613    main(args)
14614
14615
14616
14617---
14618File: /verify/verify.py
14619---
14620
14621from substrateinterface import Keypair
14622from binascii import unhexlify
14623
14624
14625def main(args):
14626    file_data = open(args.file).read()
14627    file_split = file_data.split("\n\t")
14628
14629    address_line = file_split[1]
14630    address_prefix = "Signed by: "
14631    if address_line.startswith(address_prefix):
14632        address = address_line[len(address_prefix) :]
14633    else:
14634        address = address_line
14635
14636    keypair = Keypair(ss58_address=address, ss58_format=42)
14637
14638    message = file_split[0]
14639
14640    signature_line = file_split[2]
14641    signature_prefix = "Signature: "
14642    if signature_line.startswith(signature_prefix):
14643        signature = signature_line[len(signature_prefix) :]
14644    else:
14645        signature = signature_line
14646
14647    real_signature = unhexlify(signature.encode())
14648
14649    if not keypair.verify(data=message, signature=real_signature):
14650        raise ValueError(f"Invalid signature for address={address}")
14651    else:
14652        print(f"Signature verified, signed by {address}")
14653
14654
14655if __name__ == "__main__":
14656    import argparse
14657
14658    parser = argparse.ArgumentParser(description="Verify a signature")
14659    parser.add_argument("--file", help="The file containing the message and signature")
14660    args = parser.parse_args()
14661    main(args)
14662
14663
14664
14665---
14666File: /README.md
14667---
14668
14669# **Gen42 - Code Generation on Bittensor** <!-- omit in toc -->
14670
14671<!-- ### Decentralizing Code Generation  -->
14672
14673<!-- [Discord](https://discord.gg/code) • [Network](https://taostats.io/) • [Research](https://bittensor.com/whitepaper) -->
14674
14675<!-- </div> -->
14676
14677<!-- --- -->
14678
14679# Introduction
14680
14681Gen42 leverages the Bittensor network to provide decentralized code generation services. Our focus is on creating robust, scalable tools for code-based Q&A and code completion, powered by open-source large language models.
14682
14683:link:**Useful Links:** <br>
14684
14685- [Gen42 Home](https://www.gen42.ai)
14686- [Gen42 Chat](https://chat.gen42.ai)
14687- [Gen42 API](http://api.gen42.ai)
14688
14689
14690### Products
14691
14692#### Chat App
14693
14694We provide a chat frontend that allows users to interact with our subnet. The primary offering of this app is code-based QnA.
14695
14696#### Code Completion
14697<!-- 
14698Code completion has exploded in recent years, tools like [Github Copilot](https://github.com/features/copilot) are extremely popular but lack in some manners.  -->
14699
14700<!-- Our subnet aims to compete with Copilot by offering code completion hosted on Bittensor through [Continue.dev](https://continue.dev/). Unlike Copilot we will not be relying on OpenAI. Our miners will be running open-source code-focused LLMs which have proven to be faster and smarter than the product Copilot uses (GPT Codex).  -->
14701
14702<!-- With an unoptimized miner we have already found that  -->
14703
14704We provide an openai compliant api capable of being utilized with [continue.dev](https://continue.dev/). For information on getting started visit [Gen42](https://www.gen42.ai). 
14705
14706---
14707
14708
14709## Mining and Validating
14710
14711#### Validators
14712
14713To get started as a validator, follow the [Validator Quickstart Guide](./docs/validators/quickstart.md).
14714
14715#### Miners
14716
14717To begin mining, refer to the [Miner Quickstart Guide](./docs/miners/quickstart.md).
14718
14719
14720
14721##### Disclaimer
14722
14723This repo is a fork off Subnet 1, [Prompting](https://github.com/macrocosm-os/prompting/tree/main). Credit for the amazing code goes to them, they did a wonderful job.
14724
14725
14726---
14727File: /setup.py
14728---
14729
14730# The MIT License (MIT)
14731# Copyright © 2023 Yuma Rao
14732# TODO(developer): Set your name
14733# Copyright © 2023 <your name>
14734
14735# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
14736# documentation files (the “Software”), to deal in the Software without restriction, including without limitation
14737# the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
14738# and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
14739
14740# The above copyright notice and this permission notice shall be included in all copies or substantial portions of
14741# the Software.
14742
14743# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
14744# THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
14745# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
14746# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
14747# DEALINGS IN THE SOFTWARE.
14748
14749import re
14750import os
14751import codecs
14752import pathlib
14753from os import path
14754from io import open
14755from setuptools import setup, find_packages
14756from pkg_resources import parse_requirements
14757
14758
14759def read_requirements(path):
14760    with open(path, "r") as f:
14761        requirements = f.read().splitlines()
14762        processed_requirements = []
14763
14764        for req in requirements:
14765            # For git or other VCS links
14766            if req.startswith("git+") or "@" in req:
14767                pkg_name = re.search(r"(#egg=)([\w\-_]+)", req)
14768                if pkg_name:
14769                    processed_requirements.append(pkg_name.group(2))
14770                else:
14771                    # You may decide to raise an exception here,
14772                    # if you want to ensure every VCS link has an #egg=<package_name> at the end
14773                    continue
14774            else:
14775                processed_requirements.append(req)
14776        return processed_requirements
14777
14778
14779requirements = read_requirements("requirements.txt")
14780here = path.abspath(path.dirname(__file__))
14781
14782with open(path.join(here, "README.md"), encoding="utf-8") as f:
14783    long_description = f.read()
14784
14785# loading version from setup.py
14786with codecs.open(
14787    os.path.join(here, "coding/__init__.py"), encoding="utf-8"
14788) as init_file:
14789    version_match = re.search(
14790        r"^__version__ = ['\"]([^'\"]*)['\"]", init_file.read(), re.M
14791    )
14792    version_string = version_match.group(1)
14793
14794setup(
14795    name="coding",  
14796    version=version_string,
14797    description="Code Generation Subnet",  
14798    long_description=long_description,
14799    long_description_content_type="text/markdown",
14800    url="https://github.com/brokespace/code",  
14801    author="brokespace",  
14802    packages=find_packages(),
14803    include_package_data=True,
14804    author_email="",  
14805    license="MIT",
14806    python_requires=">=3.8",
14807    install_requires=requirements,
14808    classifiers=[
14809        "Development Status :: 3 - Alpha",
14810        "Intended Audience :: Developers",
14811        "Topic :: Software Development :: Build Tools",
14812        # Pick your license as you wish
14813        "License :: OSI Approved :: MIT License",
14814        "Programming Language :: Python :: 3 :: Only",
14815        "Programming Language :: Python :: 3.8",
14816        "Programming Language :: Python :: 3.9",
14817        "Programming Language :: Python :: 3.10",
14818        "Topic :: Scientific/Engineering",
14819        "Topic :: Scientific/Engineering :: Mathematics",
14820        "Topic :: Scientific/Engineering :: Artificial Intelligence",
14821        "Topic :: Software Development",
14822        "Topic :: Software Development :: Libraries",
14823        "Topic :: Software Development :: Libraries :: Python Modules",
14824    ],
14825)