Virtual Assistant β
An AI virtual assistant that runs locally with no internet connection.
Functional Requirements β
Ideally this will be made with Tauri to have more flexibility with the UI. One idea is to create separate server.
Non Functional Requirements β
- Local First
- Beautiful UI
- Mobile Friendly
UI Design β
The UI is based on several Dribble designs and Facebook
Wireframes
Kokoro Code
python
import os
from kokoro import KPipeline
from IPython.display import Audio as IPAudio
import soundfile as sf
from pydub import AudioSegment
import os
class Person:
def __init__(self):
# πΊπΈ 'a' => American English, π¬π§ 'b' => British English
# π―π΅ 'j' => Japanese: pip install misaki[ja]
# π¨π³ 'z' => Mandarin Chinese: pip install misaki[zh]
# pipeline = KPipeline(lang_code='a') # <= make sure lang_code matches voice
self.language = "a"
self.text = "Hello, welcome"
self.voices = [
"af",
"af_bella",
"af_sky",
"af_nicole",
"af_heart",
"af_sarah",
"am_adam",
"am_michael",
"af_isabella",
"bm_george",
"bm_lewis",
"af_aoede",
"af_nova",
"af_alloy",
"af_jessica",
"af_river",
"bm_daniel",
"bm_fable",
"bf_lily",
"bg_alice",
"bf_emma",
"bf_isabella",
"am_santa",
"am_onyx",
"am_liam",
"am_puck",
"am_fenrir",
"am_echo",
"am_eric",
"af_kore",
]
self.current_voice = self.voices[4]
self.pipeline = KPipeline(
lang_code=self.language
) # <= make sure lang_code matches voice
def speak(self):
# 4οΈβ£ Generate, display, and save audio files in a loop.
generator = self.pipeline(
self.text,
voice=self.current_voice, # <= change voice here
speed=1,
split_pattern=r"\n+",
)
try:
os.makedirs('./files')
except OSError:
if not os.path.isdir('./files'):
print("Already exists")
for i, (gs, ps, audio) in enumerate(generator):
print("-" * 50)
#print(i) # i => index
print(gs) # gs => graphemes/text
#print(ps) # ps => phonemes
sf.write(f"./files/{i}.wav", audio, 24000) # save each audio file
# Example usage
combine_audio_files("./files")
def combine_audio_files(directory, output_file="combined_audio.mp3"):
combined = AudioSegment.empty()
# Get all audio files in the directory
audio_files = [f for f in os.listdir(directory) if f.endswith(('.wav', '.ogg', '.flac'))]
audio_files.sort() # Ensure files are combined in order
if not audio_files:
print("No audio files found in the directory.")
return
# Combine each audio file
for audio_file in audio_files:
audio_path = os.path.join(directory, audio_file)
audio_segment = AudioSegment.from_file(audio_path)
combined += audio_segment
# Export the combined audio
output_path = os.path.join(directory, output_file)
combined.export(output_path, format="mp3")
print(f"Combined audio saved as {output_path}")
p = Person()
p.text= "Gain expert insights and strategic guidance to navigate complex technology challenges. We provide tailored advice to help you make informed decisions that drive success.From custom applications to enterprise solutions, we develop robust and scalable software tailored to your needs.Elevate your brand with professional design services, including branding, UI/UX, and marketing visuals that make a lasting impact.We craft user-centric products with expert advice on prototyping, usability, and feature prioritization for maximum market impact. Leverage the power of artificial intelligence to automate processes, enhance decision-making, and drive innovation in your business.Keep your projects on track with structured management solutions, agile methodologies, and efficient team coordination. Visit upskil dot dev today for the best online coding course you've ever taken."
p.speak()
Ollama Flet Code
python
from flet import app, Text, TextField, Column, ElevatedButton,Markdown,ScrollMode,Dropdown,dropdown
from ollama import chat
from ollama import ChatResponse
def main(page):
first_name = TextField(label="Prompt", autofocus=True)
greetings = Column(scroll=ScrollMode.AUTO,auto_scroll=True,height=700)
status = Text("Thinking...")
prompt = Text("")
t = Text("llama3.2")
def dropdown_changed(e):
t.value = f"{dd.value}"
page.update()
dd = Dropdown(
width=400,
on_change=dropdown_changed,
options=[
dropdown.Option("llama3.2"),
dropdown.Option("llama2-uncensored"),
dropdown.Option("deepseek-r1:8b"),
],
)
def btn_click(e):
page.add(status)
page.update() # Ensure "Thinking..." is displayed before processing
# llama2-uncensored
#llama3.2
response: ChatResponse = chat(model=t.value, messages=[
{
'role': 'user',
'content': first_name.value or 'Why is the sky blue?',
},
], stream=True)
page.remove(status)
message_text = Markdown(
selectable=True
) # A single text widget to update in chunks
greetings.controls.append(message_text)
page.update()
collected_text = ""
for chunk in response:
collected_text += chunk['message']['content']
message_text.value = collected_text # Update text content
page.update() # Update UI in real-time
first_name.value = ""
page.update()
first_name.focus()
page.add(
t,
dd,
first_name,
ElevatedButton("Ask", on_click=btn_click),
greetings,
)
app(target=main)