Run Settings
LanguageGo
Language Version
Run Command
package main import ( "./fileio" "fmt" "os" "sort" "strconv" "strings" ) type Morph struct { surface string base string pos string pos1 string } type Chunk struct { morphs []Morph dst int srcs []int } type wordSet struct { phrase string particle string } func parseArticle(lines []string) [][]Chunk { article := make([][]Chunk, 0) morphemes := make([]Morph, 0) sentence := make([]Chunk, 0) var chunk Chunk for _, line := range lines { if line == "EOS" { if len(morphemes) > 0 { chunk.morphs = morphemes sentence = append(sentence, chunk) morphemes = make([]Morph, 0) } if len(sentence) > 0 { initSourceIndex(sentence) article = append(article, sentence) sentence = make([]Chunk, 0) } continue } if line[0] == '*' { if len(morphemes) > 0 { chunk.morphs = morphemes sentence = append(sentence, chunk) morphemes = make([]Morph, 0) } //* 文節番号 係り先の文節番号(係り先なし:-1) 主辞の形態素番号/機能語の形態素番号 係り関係のスコア words := strings.Split(line, " ") // Remove "D" dst, err := strconv.Atoi(words[2][:len(words[2])-1]) if err != nil { panic(err) } chunk = initChunk(chunk, dst) continue } //表層形\t品詞,品詞細分類1,品詞細分類2,品詞細分類3,活用形,活用型,原形,読み,発音 word := strings.Split(line, "\t") words := strings.Split(word[1], ",") morpheme := Morph{ surface: word[0], base: words[6], pos: words[0], pos1: words[1], } morphemes = append(morphemes, morpheme) } return article } func initChunk(chunk Chunk, dst int) Chunk { chunk.morphs = make([]Morph, 0) chunk.dst = dst chunk.srcs = make([]int, 0) return chunk } func initSourceIndex(sentence []Chunk) { for i, chunk := range sentence { if chunk.dst < 0 { continue } sentence[chunk.dst].srcs = append(sentence[chunk.dst].srcs, i) } } func containVerb(morphs []Morph) bool { for _, morph := range morphs { if morph.pos == "動詞" { return true } } return false } func containParticle(morphs []Morph) bool { for _, morph := range morphs { if morph.pos == "助詞" { return true } } return false } func containParticleWo(morphs []Morph) bool { for _, morph := range morphs { if morph.pos == "助詞" && morph.surface == "を" { return true } } return false } func fetchVerb(morphs []Morph) string { for _, morph := range morphs { if morph.pos == "動詞" { return morph.base } } return "" } func fetchParticleAll(morphs []Morph) string { particles := "" for _, morph := range morphs { if morph.pos == "助詞" { particles += morph.base + " " } } return strings.TrimRight(particles, " ") } func fetchSahenSetsuzokuNounWo(morphs []Morph) string { for i, morph := range morphs { if morph.pos != "名詞" || morph.pos1 != "サ変接続" { continue } if i+1 < len(morphs) { if morphs[i+1].surface == "を" { return morph.surface + "を" } } } return "" } func joinPhrase(morphs []Morph) string { joinString := "" for _, morph := range morphs { if morph.pos == "記号" { continue } joinString += morph.surface } return joinString } func printVerbCases(sentence []Chunk) { for _, chunk := range sentence { if containVerb(chunk.morphs) == false { continue } sahenNounWo := "" for _, index := range chunk.srcs { sahenNounWo = fetchSahenSetsuzokuNounWo(sentence[index].morphs) if sahenNounWo == "" { continue } predicate := sahenNounWo + fetchVerb(chunk.morphs) wordSets := make([]wordSet, 0) var set wordSet for _, i := range chunk.srcs { if i == index { continue } if containParticle(sentence[i].morphs) { set.particle = fetchParticleAll(sentence[i].morphs) set.phrase = joinPhrase(sentence[i].morphs) wordSets = append(wordSets, set) } } sort.SliceStable(wordSets, func(i, j int) bool { return wordSets[i].particle < wordSets[j].particle }) particles := "" phrases := "" for _, set := range wordSets { particles += set.particle + " " phrases += set.phrase + " " } if particles != "" { particles = strings.TrimRight(particles, " ") phrases = strings.TrimRight(phrases, " ") fmt.Printf("%s\t%s\t%s\n", predicate, particles, phrases) } } } } func main() { if len(os.Args) != 2 { fmt.Println("Usage: main <filepath>") os.Exit(1) } lines := fileio.ReadFileAllLines(os.Args[1]) article := parseArticle(lines) for _, sentence := range article { printVerbCases(sentence) } }
package fileio import ( "bufio" "fmt" "io" "os" ) func WriteFileAllLines(fileName string, lines []string) { file, err := os.OpenFile(fileName, os.O_WRONLY|os.O_CREATE, 0666) if err != nil { panic(err) } defer file.Close() w := bufio.NewWriter(file) for _, line := range lines { fmt.Fprintln(w, line) } w.Flush() } func WriteFileString(fileName string, str string) { file, err := os.Create(fileName) if err != nil { panic(err) } defer file.Close() file.Write([]byte(str)) } func ReadAllLine(r io.Reader) []string { sc := bufio.NewScanner(r) lines := []string{} for sc.Scan() { lines = append(lines, sc.Text()) } return lines } func ReadFileAllLines(fileName string) []string { fp, err := os.Open(fileName) if err != nil { panic(err) } defer fp.Close() lines := ReadAllLine(fp) return lines }
#!/bin/bash mkdir fileio mv fileio.go fileio go build main.go ./main input.txt
* 0 3D 0/0 0.455238   記号,空白,*,*,*,*, , ,  * 1 2D 0/0 1.021216 別段 副詞,助詞類接続,*,*,*,*,別段,ベツダン,ベツダン * 2 3D 0/2 2.294919 くる 動詞,自立,*,*,カ変・クル,基本形,くる,クル,クル に 助詞,格助詞,一般,*,*,*,に,ニ,ニ も 助詞,係助詞,*,*,*,*,も,モ,モ * 3 7D 0/3 -1.816825 及ば 動詞,自立,*,*,五段・バ行,未然形,及ぶ,オヨバ,オヨバ ん 助動詞,*,*,*,不変化型,基本形,ん,ン,ン さ 助詞,終助詞,*,*,*,*,さ,サ,サ と 助詞,格助詞,引用,*,*,*,と,ト,ト 、 記号,読点,*,*,*,*,、,、,、 * 4 7D 0/1 -1.816825 主人 名詞,一般,*,*,*,*,主人,シュジン,シュジン は 助詞,係助詞,*,*,*,*,は,ハ,ワ * 5 7D 0/1 -1.816825 手紙 名詞,一般,*,*,*,*,手紙,テガミ,テガミ に 助詞,格助詞,一般,*,*,*,に,ニ,ニ * 6 7D 0/1 -1.816825 返事 名詞,サ変接続,*,*,*,*,返事,ヘンジ,ヘンジ を 助詞,格助詞,一般,*,*,*,を,ヲ,ヲ * 7 -1D 0/0 0.000000 する 動詞,自立,*,*,サ変・スル,基本形,する,スル,スル 。 記号,句点,*,*,*,*,。,。,。 EOS
Editor Settings
Theme
Key bindings
Full width
Lines