package main
import (
"./fileio"
"fmt"
"os"
"sort"
"strconv"
"strings"
)
type Morph struct {
surface string
base string
pos string
pos1 string
}
type Chunk struct {
morphs []Morph
dst int
srcs []int
}
type wordSet struct {
phrase string
particle string
}
func parseArticle(lines []string) [][]Chunk {
article := make([][]Chunk, 0)
morphemes := make([]Morph, 0)
sentence := make([]Chunk, 0)
var chunk Chunk
for _, line := range lines {
if line == "EOS" {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
if len(sentence) > 0 {
initSourceIndex(sentence)
article = append(article, sentence)
sentence = make([]Chunk, 0)
}
continue
}
if line[0] == '*' {
if len(morphemes) > 0 {
chunk.morphs = morphemes
sentence = append(sentence, chunk)
morphemes = make([]Morph, 0)
}
//* 文節番号 係り先の文節番号(係り先なし:-1) 主辞の形態素番号/機能語の形態素番号 係り関係のスコア
words := strings.Split(line, " ")
// Remove "D"
dst, err := strconv.Atoi(words[2][:len(words[2])-1])
if err != nil {
panic(err)
}
chunk = initChunk(chunk, dst)
continue
}
//表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音
word := strings.Split(line, "\t")
words := strings.Split(word[1], ",")
morpheme := Morph{
surface: word[0],
base: words[6],
pos: words[0],
pos1: words[1],
}
morphemes = append(morphemes, morpheme)
}
return article
}
func initChunk(chunk Chunk, dst int) Chunk {
chunk.morphs = make([]Morph, 0)
chunk.dst = dst
chunk.srcs = make([]int, 0)
return chunk
}
func initSourceIndex(sentence []Chunk) {
for i, chunk := range sentence {
if chunk.dst < 0 {
continue
}
sentence[chunk.dst].srcs = append(sentence[chunk.dst].srcs, i)
}
}
func containVerb(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "動詞" {
return true
}
}
return false
}
func containParticle(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "助詞" {
return true
}
}
return false
}
func containParticleWo(morphs []Morph) bool {
for _, morph := range morphs {
if morph.pos == "助詞" && morph.surface == "を" {
return true
}
}
return false
}
func fetchVerb(morphs []Morph) string {
for _, morph := range morphs {
if morph.pos == "動詞" {
return morph.base
}
}
return ""
}
func fetchParticleAll(morphs []Morph) string {
particles := ""
for _, morph := range morphs {
if morph.pos == "助詞" {
particles += morph.base + " "
}
}
return strings.TrimRight(particles, " ")
}
func fetchSahenSetsuzokuNounWo(morphs []Morph) string {
for i, morph := range morphs {
if morph.pos != "名詞" || morph.pos1 != "サ変接続" {
continue
}
if i+1 < len(morphs) {
if morphs[i+1].surface == "を" {
return morph.surface + "を"
}
}
}
return ""
}
func joinPhrase(morphs []Morph) string {
joinString := ""
for _, morph := range morphs {
if morph.pos == "記号" {
continue
}
joinString += morph.surface
}
return joinString
}
func printVerbCases(sentence []Chunk) {
for _, chunk := range sentence {
if containVerb(chunk.morphs) == false {
continue
}
sahenNounWo := ""
for _, index := range chunk.srcs {
sahenNounWo = fetchSahenSetsuzokuNounWo(sentence[index].morphs)
if sahenNounWo == "" {
continue
}
predicate := sahenNounWo + fetchVerb(chunk.morphs)
wordSets := make([]wordSet, 0)
var set wordSet
for _, i := range chunk.srcs {
if i == index {
continue
}
if containParticle(sentence[i].morphs) {
set.particle = fetchParticleAll(sentence[i].morphs)
set.phrase = joinPhrase(sentence[i].morphs)
wordSets = append(wordSets, set)
}
}
sort.SliceStable(wordSets, func(i, j int) bool {
return wordSets[i].particle < wordSets[j].particle
})
particles := ""
phrases := ""
for _, set := range wordSets {
particles += set.particle + " "
phrases += set.phrase + " "
}
if particles != "" {
particles = strings.TrimRight(particles, " ")
phrases = strings.TrimRight(phrases, " ")
fmt.Printf("%s\t%s\t%s\n", predicate, particles, phrases)
}
}
}
}
func main() {
if len(os.Args) != 2 {
fmt.Println("Usage: main <filepath>")
os.Exit(1)
}
lines := fileio.ReadFileAllLines(os.Args[1])
article := parseArticle(lines)
for _, sentence := range article {
printVerbCases(sentence)
}
}
package fileio
import (
"bufio"
"fmt"
"io"
"os"
)
func WriteFileAllLines(fileName string, lines []string) {
file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
panic(err)
}
defer file.Close()
w := bufio.NewWriter(file)
for _, line := range lines {
fmt.Fprintln(w, line)
}
w.Flush()
}
func WriteFileString(fileName string, str string) {
file, err := os.Create(fileName)
if err != nil {
panic(err)
}
defer file.Close()
file.Write([]byte(str))
}
func ReadAllLine(r io.Reader) []string {
sc := bufio.NewScanner(r)
lines := []string{}
for sc.Scan() {
lines = append(lines, sc.Text())
}
return lines
}
func ReadFileAllLines(fileName string) []string {
fp, err := os.Open(fileName)
if err != nil {
panic(err)
}
defer fp.Close()
lines := ReadAllLine(fp)
return lines
}
#!/bin/bash
mkdir fileio
mv fileio.go fileio
go build main.go
./main input.txt
* 0 3D 0/0 0.455238
記号,空白,*,*,*,*, , ,
* 1 2D 0/0 1.021216
別段 副詞,助詞類接続,*,*,*,*,別段,ベツダン,ベツダン
* 2 3D 0/2 2.294919
くる 動詞,自立,*,*,カ変・クル,基本形,くる,クル,クル
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
も 助詞,係助詞,*,*,*,*,も,モ,モ
* 3 7D 0/3 -1.816825
及ば 動詞,自立,*,*,五段・バ行,未然形,及ぶ,オヨバ,オヨバ
ん 助動詞,*,*,*,不変化型,基本形,ん,ン,ン
さ 助詞,終助詞,*,*,*,*,さ,サ,サ
と 助詞,格助詞,引用,*,*,*,と,ト,ト
、 記号,読点,*,*,*,*,、,、,、
* 4 7D 0/1 -1.816825
主人 名詞,一般,*,*,*,*,主人,シュジン,シュジン
は 助詞,係助詞,*,*,*,*,は,ハ,ワ
* 5 7D 0/1 -1.816825
手紙 名詞,一般,*,*,*,*,手紙,テガミ,テガミ
に 助詞,格助詞,一般,*,*,*,に,ニ,ニ
* 6 7D 0/1 -1.816825
返事 名詞,サ変接続,*,*,*,*,返事,ヘンジ,ヘンジ
を 助詞,格助詞,一般,*,*,*,を,ヲ,ヲ
* 7 -1D 0/0 0.000000
する 動詞,自立,*,*,サ変・スル,基本形,する,スル,スル
。 記号,句点,*,*,*,*,。,。,。
EOS