hosts-go/internal/core/parser.go
phg b81f11f711 feat(parser): Implement hosts file parser with intelligent formatting
- Added `internal/core/parser.go` for parsing hosts files, including:
  - Support for standard entries (IPv4, IPv6, multiple aliases, inline comments)
  - Handling of comments and disabled entries
  - Error recovery for malformed lines with warnings
  - Intelligent formatting with adaptive spacing and column alignment
  - Backup and atomic write operations for file safety

test(parser): Add comprehensive tests for hosts file parsing

- Created `tests/parser_test.go` with 54 test cases covering:
  - Standard entries and comments
  - Malformed lines and whitespace variations
  - Round-trip parsing to ensure format preservation
  - Backup functionality for hosts files

docs(progress): Update project progress and next steps

- Mark Phase 1 as complete and outline tasks for Phase 2 (TUI implementation)
- Highlight completed features and testing coverage
2025-08-13 10:46:39 +02:00

417 lines
10 KiB
Go

package core
import (
"bufio"
"fmt"
"net"
"os"
"path/filepath"
"regexp"
"strings"
"time"
)
// ParseWarning represents a warning encountered during parsing
type ParseWarning struct {
Line int // Line number (1-based)
Message string // Warning message
}
// FormattingStyle represents the detected formatting style of a hosts file
type FormattingStyle struct {
UseTabs bool // Whether to use tabs for separation
SpacesPerTab int // Number of spaces per tab if using spaces
IPWidth int // Width for IP column alignment
HostWidth int // Width for hostname column alignment
AlignComments bool // Whether to align comments
}
// DetectFormattingStyle analyzes the given lines to determine the formatting style
func DetectFormattingStyle(lines []string) FormattingStyle {
style := FormattingStyle{
UseTabs: true,
SpacesPerTab: 4,
IPWidth: 15,
HostWidth: 30,
AlignComments: true,
}
tabCount, spaceCount := 0, 0
spaceLengths := make(map[int]int)
for _, line := range lines {
// Skip empty lines and comments
line = strings.TrimSpace(line)
if line == "" || strings.HasPrefix(line, "#") {
continue
}
if strings.Contains(line, "\t") {
tabCount++
} else {
// Look for sequences of spaces between non-space characters
spaceRegex := regexp.MustCompile(`\S\s{2,}\S`)
if spaceRegex.MatchString(line) {
spaceCount++
// Find all space sequences and count their lengths
allSpaces := regexp.MustCompile(`\s{2,}`)
matches := allSpaces.FindAllString(line, -1)
for _, match := range matches {
spaceLengths[len(match)]++
}
}
}
}
// Use spaces if more space-separated lines than tab-separated
if spaceCount > tabCount {
style.UseTabs = false
// Find the greatest common divisor of all space lengths to detect the base unit
if len(spaceLengths) > 0 {
var lengths []int
for length := range spaceLengths {
if length >= 2 {
lengths = append(lengths, length)
}
}
if len(lengths) > 0 {
gcd := lengths[0]
for i := 1; i < len(lengths); i++ {
gcd = findGCD(gcd, lengths[i])
}
// Use GCD as the base spacing unit, but ensure it's at least 2 and reasonable
if gcd >= 2 && gcd <= 8 {
style.SpacesPerTab = gcd
} else if len(lengths) == 1 {
// Single space length detected, use it directly
style.SpacesPerTab = lengths[0]
} else {
style.SpacesPerTab = 4 // fallback
}
} else {
style.SpacesPerTab = 4 // fallback
}
} else {
style.SpacesPerTab = 4 // fallback
}
}
return style
}
// findGCD finds the greatest common divisor of two integers
func findGCD(a, b int) int {
for b != 0 {
a, b = b, a%b
}
return a
}
// ParseHostsFile reads and parses a hosts file from the filesystem
func ParseHostsFile(filepath string) (*HostsFile, []ParseWarning, error) {
file, err := os.Open(filepath)
if err != nil {
return nil, nil, fmt.Errorf("failed to open hosts file: %w", err)
}
defer file.Close()
var lines []string
scanner := bufio.NewScanner(file)
for scanner.Scan() {
lines = append(lines, scanner.Text())
}
if err := scanner.Err(); err != nil {
return nil, nil, fmt.Errorf("failed to read hosts file: %w", err)
}
return ParseHostsContent(lines)
}
// ParseHostsContent parses hosts file content from a slice of lines
func ParseHostsContent(lines []string) (*HostsFile, []ParseWarning, error) {
hostsFile := NewHostsFile()
var warnings []ParseWarning
for lineNum, line := range lines {
lineNum++ // Convert to 1-based indexing
// Skip empty lines
if strings.TrimSpace(line) == "" {
continue
}
// Handle comments and disabled entries
if strings.HasPrefix(strings.TrimSpace(line), "#") {
if entry, warning := parseCommentLine(line, lineNum); entry != nil {
hostsFile.AddEntry(entry)
if warning != nil {
warnings = append(warnings, *warning)
}
} else if comment := parseStandaloneComment(line); comment != "" {
hostsFile.Comments = append(hostsFile.Comments, comment)
}
continue
}
// Parse regular entry
entry, warning := parseEntryLine(line, lineNum)
if entry != nil {
hostsFile.AddEntry(entry)
}
if warning != nil {
warnings = append(warnings, *warning)
}
}
return hostsFile, warnings, nil
}
// parseCommentLine parses a commented line, which might be a disabled entry
func parseCommentLine(line string, lineNum int) (*HostEntry, *ParseWarning) {
// Remove the leading # and any whitespace
content := strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), "#"))
// Split by comment first to handle inline comments in disabled entries
commentParts := strings.SplitN(content, "#", 2)
entryPart := strings.TrimSpace(commentParts[0])
var inlineComment string
if len(commentParts) > 1 {
inlineComment = strings.TrimSpace(commentParts[1])
}
// Try to parse as a disabled entry
parts := regexp.MustCompile(`\s+`).Split(entryPart, -1)
if len(parts) < 2 {
return nil, nil // This is just a standalone comment
}
ip := strings.TrimSpace(parts[0])
if net.ParseIP(ip) == nil {
return nil, nil // Not a valid IP, treat as standalone comment
}
hostname := strings.TrimSpace(parts[1])
if err := validateHostname(hostname); err != nil {
warning := &ParseWarning{
Line: lineNum,
Message: fmt.Sprintf("invalid hostname in disabled entry: %v", err),
}
return nil, warning
}
// Create the entry
entry := &HostEntry{
IP: ip,
Hostname: hostname,
Aliases: make([]string, 0),
Comment: inlineComment,
Active: false, // Commented out = inactive
Original: line,
}
// Parse aliases
for i := 2; i < len(parts); i++ {
alias := strings.TrimSpace(parts[i])
if alias != "" {
if err := validateHostname(alias); err == nil {
entry.Aliases = append(entry.Aliases, alias)
}
}
}
return entry, nil
}
// parseStandaloneComment extracts a standalone comment
func parseStandaloneComment(line string) string {
content := strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), "#"))
return content
}
// parseEntryLine parses a regular (non-commented) entry line
func parseEntryLine(line string, lineNum int) (*HostEntry, *ParseWarning) {
// Split by comment first
parts := strings.SplitN(line, "#", 2)
entryPart := strings.TrimSpace(parts[0])
var comment string
if len(parts) > 1 {
comment = strings.TrimSpace(parts[1])
}
// Split the entry part by whitespace
fields := regexp.MustCompile(`\s+`).Split(entryPart, -1)
if len(fields) < 2 {
return nil, &ParseWarning{
Line: lineNum,
Message: "missing hostname",
}
}
ip := strings.TrimSpace(fields[0])
if net.ParseIP(ip) == nil {
return nil, &ParseWarning{
Line: lineNum,
Message: fmt.Sprintf("invalid IP address: %s", ip),
}
}
hostname := strings.TrimSpace(fields[1])
if err := validateHostname(hostname); err != nil {
return nil, &ParseWarning{
Line: lineNum,
Message: fmt.Sprintf("invalid hostname: %v", err),
}
}
entry := &HostEntry{
IP: ip,
Hostname: hostname,
Aliases: make([]string, 0),
Comment: comment,
Active: true,
Original: line,
}
// Parse aliases
for i := 2; i < len(fields); i++ {
alias := strings.TrimSpace(fields[i])
if alias != "" {
if err := validateHostname(alias); err == nil {
entry.Aliases = append(entry.Aliases, alias)
}
}
}
return entry, nil
}
// FormatHostsFile formats a hosts file with intelligent formatting
func FormatHostsFile(hostsFile *HostsFile) []string {
var lines []string
// Add standalone comments first (treating them as header comments)
for _, comment := range hostsFile.Comments {
lines = append(lines, "# "+comment)
}
// Calculate column widths for alignment
ipWidth, hostWidth := calculateColumnWidths(hostsFile.Entries)
// Format entries
for _, entry := range hostsFile.Entries {
line := formatEntry(entry, ipWidth, hostWidth)
lines = append(lines, line)
}
return lines
}
// calculateColumnWidths determines optimal column widths for alignment
func calculateColumnWidths(entries []*HostEntry) (int, int) {
maxIPWidth := 10
maxHostWidth := 15
for _, entry := range entries {
if len(entry.IP) > maxIPWidth {
maxIPWidth = len(entry.IP)
}
if len(entry.Hostname) > maxHostWidth {
maxHostWidth = len(entry.Hostname)
}
}
return maxIPWidth + 2, maxHostWidth + 2
}
// formatEntry formats a single entry with intelligent alignment
func formatEntry(entry *HostEntry, ipWidth, hostWidth int) string {
var parts []string
// Format IP address with padding
parts = append(parts, fmt.Sprintf("%-*s", ipWidth, entry.IP))
// Format hostname with padding
parts = append(parts, fmt.Sprintf("%-*s", hostWidth, entry.Hostname))
// Add aliases
for _, alias := range entry.Aliases {
parts = append(parts, alias)
}
line := strings.Join(parts, "\t")
// Add comment if present
if entry.Comment != "" {
line += "\t# " + entry.Comment
}
// Add comment prefix if inactive
if !entry.Active {
line = "# " + line
}
return line
}
// WriteHostsFile writes a hosts file to the filesystem with intelligent formatting
func WriteHostsFile(filepath string, hostsFile *HostsFile) error {
// Create backup before writing
if _, err := BackupHostsFile(filepath); err != nil {
return fmt.Errorf("failed to create backup: %w", err)
}
// Format the content
lines := FormatHostsFile(hostsFile)
content := strings.Join(lines, "\n") + "\n"
// Write to temporary file first for atomic operation
tmpPath := filepath + ".tmp"
if err := os.WriteFile(tmpPath, []byte(content), 0644); err != nil {
return fmt.Errorf("failed to write temporary file: %w", err)
}
// Atomic move
if err := os.Rename(tmpPath, filepath); err != nil {
os.Remove(tmpPath) // Clean up temp file
return fmt.Errorf("failed to replace hosts file: %w", err)
}
return nil
}
// BackupHostsFile creates a backup of the hosts file
func BackupHostsFile(hostsPath string) (string, error) {
// Create config directory
homeDir, err := os.UserHomeDir()
if err != nil {
return "", fmt.Errorf("failed to get home directory: %w", err)
}
configDir := filepath.Join(homeDir, ".config", "hosts-go")
if err := os.MkdirAll(configDir, 0755); err != nil {
return "", fmt.Errorf("failed to create config directory: %w", err)
}
// Create backup filename with timestamp
timestamp := time.Now().Format("20060102-150405")
backupPath := filepath.Join(configDir, fmt.Sprintf("hosts.backup.%s", timestamp))
// Copy the file
content, err := os.ReadFile(hostsPath)
if err != nil {
return "", fmt.Errorf("failed to read original hosts file: %w", err)
}
if err := os.WriteFile(backupPath, content, 0644); err != nil {
return "", fmt.Errorf("failed to write backup file: %w", err)
}
return backupPath, nil
}