package main
import (
"./fileio"
"fmt"
"os"
"strconv"
"strings"
)
type Morph struct {
surface string
base string
pos string
pos1 string
}
type Chunk struct {
morphs []Morph
dst int
srcs []int
}
func parseArticle(lines []string) [][]Chunk {
article := make([][]Chunk, 0)
morphemes := make([]Morph, 0)
sentence := make([]Chunk, 0)
var chunk Chunk
for _, line := range lines {
if line == "EOS" {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
if len(sentence) > 0 {
initSourceIndex(sentence)
article = append(article, sentence)
sentence = make([]Chunk, 0)
}
continue
}
if line[0] == '*' {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
//* 文節番号 係り先の文節番号(係り先なし:-1) 主辞の形態素番号/機能語の形態素番号 係り関係のスコア
words := strings.Split(line, " ")
// Remove "D"
dst, err := strconv.Atoi(words[2][:len(words[2])-1])
if err != nil {
panic(err)
}
chunk = initChunk(chunk, dst)
continue
}
//表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
word := strings.Split(line, "\t")
words := strings.Split(word[1], ",")
morpheme := Morph{
surface: word[0],
base: words[6],
pos: words[0],
pos1: words[1],
}
morphemes = append(morphemes, morpheme)
}
return article
}
func initChunk(chunk Chunk, dst int) Chunk {
chunk.morphs = make([]Morph, 0)
chunk.dst = dst
chunk.srcs = make([]int, 0)
return chunk
}
func initSourceIndex(sentence []Chunk) {
for i, chunk := range sentence {
if chunk.dst < 0 {
continue
}
sentence[chunk.dst].srcs = append(sentence[chunk.dst].srcs, i)
}
}
func containNoun(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "名詞" {
return true
}
}
return false
}
func joinPhrase(morphs []Morph) string {
joinString := ""
for _, morph := range morphs {
if morph.pos == "記号" {
continue
}
joinString += morph.surface
}
return joinString
}
func joinArrow(sentence []Chunk, chunk Chunk) string {
phrases := ""
chunkDst := chunk.dst
for chunkDst != -1 {
phrases += joinPhrase(sentence[chunkDst].morphs) + " -> "
chunkDst = sentence[chunkDst].dst
}
return phrases
}
func printNounToRoot(sentence []Chunk) {
for _, chunk := range sentence {
if containNoun(chunk.morphs) == false {
continue
}
phrases := joinArrow(sentence, chunk)
if phrases != "" {
phrases = strings.TrimRight(phrases, " -> ")
fmt.Printf("%s -> %s\n", joinPhrase(chunk.morphs), phrases)
}
}
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: main <filepath>")
os.Exit(1)
}
lines := fileio.ReadFileAllLines(os.Args[1])
article := parseArticle(lines)
for _, sentence := range article {
printNounToRoot(sentence)
}
}
package fileio
import (
"bufio"
"fmt"
"io"
"os"
)
func WriteFileAllLines(fileName string, lines []string) {
file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
panic(err)
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
w.Flush()
}
func WriteFileString(fileName string, str string) {
file, err := os.Create(fileName)
if err != nil {
panic(err)
}
defer file.Close()
file.Write([]byte(str))
}
func ReadAllLine(r io.Reader) []string {
sc := bufio.NewScanner(r)
lines := []string{}
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines
}
func ReadFileAllLines(fileName string) []string {
fp, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer fp.Close()
lines := ReadAllLine(fp)
return lines
}
#!/bin/bash
mkdir fileio
mv fileio.go fileio
go build main.go
./main input.txt
* 0 5D 0/1 -1.514009
吾輩 名詞,代名詞,一般,*,*,*,吾輩,ワガハイ,ワガハイ
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
* 1 2D 0/1 1.311423
ここ 名詞,代名詞,一般,*,*,*,ここ,ココ,ココ
で 助詞,格助詞,一般,*,*,*,で,デ,デ
* 2 3D 0/1 0.123057
始め 動詞,自立,*,*,一段,連用形,始める,ハジメ,ハジメ
て 助詞,接続助詞,*,*,*,*,て,テ,テ
* 3 4D 0/1 1.440044
人間 名詞,一般,*,*,*,*,人間,ニンゲン,ニンゲン
という 助詞,格助詞,連語,*,*,*,という,トイウ,トユウ
* 4 5D 0/1 -1.514009
もの 名詞,非自立,一般,*,*,*,もの,モノ,モノ
を 助詞,格助詞,一般,*,*,*,を,ヲ,ヲ
* 5 -1D 0/1 0.000000
見 動詞,自立,*,*,一段,連用形,見る,ミ,ミ
た 助動詞,*,*,*,特殊・タ,基本形,た,タ,タ
。 記号,句点,*,*,*,*,。,。,。
EOS