import dgl
import networkx as nx
import numpy as np
import torch
from dgl import function as fn
from matplotlib import pyplot as plt
def build_simple_graph(show_plt: bool = False) -> dgl.DGLGraph:
g = dgl.DGLGraph()
g.add_nodes(3)
src = np.array([0, 0, 1, 2])
dst = np.array([1, 2, 2, 0])
g.add_edges(src, dst)
print("node:", g.number_of_nodes())
print("edge:", g.number_of_edges())
nx.draw(g.to_networkx(), node_size=3)
if show_plt:
plt.show()
return g
def pagerank_builtin(g: dgl.DGLGraph, damp: float = 0.85) -> None:
g.ndata['pv'] /= g.ndata['deg']
g.update_all(message_func=fn.copy_src(src='pv', out='m'),
reduce_func=fn.sum(msg='m', out='m_sum'))
N = g.number_of_nodes()
g.ndata['pv'] = (1 - damp) / N + damp * g.ndata['m_sum']
def main() -> None:
F = build_simple_graph()
node_cnt = F.number_of_nodes()
F.ndata['pv'] = torch.ones(node_cnt) / node_cnt
F.ndata['deg'] = F.out_degrees(F.nodes()).float()
print("init vals:", F.ndata['pv'])
for _ in range(3):
pagerank_builtin(F)
print('step %d:' % _, F.ndata['pv'])
if __name__ == '__main__':
main()
$ python pagerank.py
Using backend: pytorch
node: 3
edge: 4
init vals: tensor([0.3333, 0.3333, 0.3333]) # 初期値1/3
step 0: tensor([0.3333, 0.1917, 0.4750])
step 1: tensor([0.4538, 0.1917, 0.3546])
step 2: tensor([0.3514, 0.2428, 0.4058])
索引語: 索引語が含まれる文書番号リスト(unique, order by 文書番号 or 単語スコア) or 出現頻度 or 文書中位置情報
の辞書
Dict[str, List[int]]
文書番号:(開始位置,終了位置)
text="おはよう"
p text.chars.each_cons(2).map(&:join) #=>["おは", "はよ", "よう"]
text="さくらさくらいま"
(0..._=text.size).map{text[_1,_]}.sort
#=> ["いま", "くらいま", "くらさくらいま", "さくらいま",
# "さくらさくらいま", "ま", "らいま", "らさくらいま"]
d-gaps
) をコード化cal_d_gap=->inv{inv.each_cons(2).map{_1.inject(:-).abs}}
p cal_d_gap[[1, 5, 9, 18, 23, 24, 30, 44, 45, 48]]
#=> [4, 4, 9, 5, 1, 6, 14, 1, 3]
(文書番号d, 連結リスト(この場合は転置リスト)の位置p)
# sorted
inv_lst = [1, 5, 9, 18, 23, 24, 30, 44, 45, 48]
mk_v_dgaps =->inv{
inv.each_cons(2)
.map{_1.inject(:-).abs}}
mk_p_skips =->inv,n{
[*inv.each_with_index]
.each_slice(n)
.map(&:first)[1..]}
search_d =->d,gaps,skips{
_m=skips[0][1]
_d,_p=skips.filter{|a,_|a<=d}[-1]
(_p..._e=_p+_m).map{|i|
_d==d ? (return i;break) :
i==_e ? (return -1;break): (_d+=gaps[i])}}
search_d[45,
mk_v_dgaps[inv_lst],
mk_p_skips[inv_lst,3]]
#=> 8