package main
import (
"./fileio"
"fmt"
"os"
"strconv"
"strings"
)
type Morph struct {
surface string
base string
pos string
pos1 string
}
type Chunk struct {
morphs []Morph
dst int
srcs []int
}
func parseArticle(lines []string) [][]Chunk {
article := make([][]Chunk, 0)
morphemes := make([]Morph, 0)
sentence := make([]Chunk, 0)
var chunk Chunk
for _, line := range lines {
if line == "EOS" {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
if len(sentence) > 0 {
initSourceIndex(sentence)
article = append(article, sentence)
sentence = make([]Chunk, 0)
}
continue
}
if line[0] == '*' {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
//* 文節番号 係り先の文節番号(係り先なし:-1) 主辞の形態素番号/機能語の形態素番号 係り関係のスコア
words := strings.Split(line, " ")
// Remove "D"
dst, err := strconv.Atoi(words[2][:len(words[2])-1])
if err != nil {
panic(err)
}
chunk = initChunk(chunk, dst)
continue
}
//表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
word := strings.Split(line, "\t")
words := strings.Split(word[1], ",")
morpheme := Morph{
surface: word[0],
base: words[6],
pos: words[0],
pos1: words[1],
}
morphemes = append(morphemes, morpheme)
}
return article
}
func initChunk(chunk Chunk, dst int) Chunk {
chunk.morphs = make([]Morph, 0)
chunk.dst = dst
chunk.srcs = make([]int, 0)
return chunk
}
func initSourceIndex(sentence []Chunk) {
for i, chunk := range sentence {
if chunk.dst < 0 {
continue
}
sentence[chunk.dst].srcs = append(sentence[chunk.dst].srcs, i)
}
}
func printSentence(sentence []Chunk) {
for _, chunk := range sentence {
for _, morph := range chunk.morphs {
if morph.pos == "記号" {
continue
}
fmt.Print(morph.surface)
}
if chunk.dst >= 0 {
fmt.Print("\t")
for _, morph := range sentence[chunk.dst].morphs {
if morph.pos == "記号" {
continue
}
fmt.Print(morph.surface)
}
}
fmt.Println("")
}
}
func printAllSentence(article [][]Chunk) {
for _, sentence := range article {
printSentence(sentence)
}
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: main <filepath>")
os.Exit(1)
}
lines := fileio.ReadFileAllLines(os.Args[1])
article := parseArticle(lines)
printAllSentence(article)
}
package fileio
import (
"bufio"
"fmt"
"io"
"os"
)
func WriteFileAllLines(fileName string, lines []string) {
file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
panic(err)
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
w.Flush()
}
func ReadAllLine(r io.Reader) []string {
sc := bufio.NewScanner(r)
lines := []string{}
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines
}
func ReadFileAllLines(fileName string) []string {
fp, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer fp.Close()
lines := ReadAllLine(fp)
return lines
}
#!/bin/bash
mkdir fileio
mv fileio.go fileio
go run main.go