MLX SFT 格式转换

去hf-mirror.org下载 SFT格式数据。json

转换方法:


import json

class sft_data():
    def __init__(self,file_name,save_name):
        f=open(file_name,"r",encoding="utf-8")
        self.data=json.loads(f.read())
        f.close()
        new_file=open(save_name,"w",encoding="utf-8")
        for i in self.data:
            one_text=self.mlx_train_text(i)
            new_file.write(json.dumps(one_text, ensure_ascii=False)+"\n")
        new_file.close()
    def mlx_train_text(self,one_dic):
        # Question=one_dic['Question']
        # Complex_CoT=one_dic['Complex_CoT']
        # Response=one_dic['Response']
        otherx=list(one_dic.values())[0:3]
        Question=otherx[0]
        Complex_CoT=otherx[1]
        Response=otherx[2]
        text="Please reason step by step:\n\nQuestion:"+Question+"\n\nLet's solve this step by step:\n"+Complex_CoT+"\n\nFinal Answer:"+Response
        return {"text":text}
    
sft_data("medical_o1_sft_Chinese.json","mlx_sft2.jsonl")

import json

class sft_data():
    def __init__(self,file_name,save_name):
        f=open(file_name,"r",encoding="utf-8")
        self.data=json.loads(f.read())
        f.close()
        new_file=open(save_name,"w")
        for i in self.data:
            one_text=self.mlx_train_text(i)
            new_file.write(json.dumps(one_text)+"\n")
        new_file.close()
    def mlx_train_text(self,one_dic):
        Question=one_dic['Question']
        Response=one_dic['Response']
        Complex_CoT=one_dic['Complex_CoT']
        text="Please reason step by step:\n\nQuestion:"+Question+"\n\nLet's solve this step by step:\n"+Complex_CoT+"\n\nFinal Answer:"+Response
        return {"text":text}
    
sft_data("medical_o1_sft_Chinese.json","mlx_sft.jsonl")

MLX 的SFT数据格式

Please reason step by step:
#空行
Question:Question
#空行
Let's solve this step by step:
Complex_CoT
#空行
Final Answer:Response

以JOSNL 的格式

{"text":上面的sft信息}
{"text":上面的sft信息}
{"text":上面的sft信息}
{"text":上面的sft信息}

发表回复